예제 #1
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec,
                              task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec,
                              task)
예제 #2
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)

        # blocks with 0 files are skipped
        # set all blocks in request to 0 files, no work should be found & an error is raised
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
        Globals.GlobalParams.resetParams()
예제 #3
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
예제 #4
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(2)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(10)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runs = dbs[inputDataset.dbsurl].listRuns(unit['Inputs'].keys()[0])
                jobs += len([x for x in runs if x in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist()])
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
예제 #5
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
 def testRunWhitelist(self):
     """ReReco lumi split with Run whitelist"""
     # get files with multiple runs
     Globals.GlobalParams.setNumOfRunsPerFile(2)
     # a large number of lumis to ensure we get multiple runs
     Globals.GlobalParams.setNumOfLumisPerBlock(10)
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setRunWhitelist([2, 3])
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         # Check number of jobs in element match number for
         # dataset in run whitelist
         jobs = 0
         wq_jobs = 0
         for unit in units:
             wq_jobs += unit['Jobs']
             runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                 dataset=unit['Inputs'].keys()[0])
             for run in runLumis:
                 if run in getFirstTask(
                         Tier1ReRecoWorkload).inputRunWhitelist():
                     jobs += runLumis[run]
         self.assertEqual(int(jobs / splitArgs['SliceSize']), int(wq_jobs))
예제 #6
0
파일: Block_t.py 프로젝트: ticoann/WMCore
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(),
                             len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #7
0
파일: Block_t.py 프로젝트: cinquo/WMCore
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 0)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        jobs += runLumis[run]
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
예제 #8
0
    def createWorkload(self):
        """
        Create a workload in order to test things

        """
        workload = rerecoWorkload("Tier1ReReco", getTestArguments())
        rereco = workload.getTask("DataProcessing")
        return workload
예제 #9
0
    def createWorkload(self):
        """
        Create a workload in order to test things

        """
        workload = rerecoWorkload("Tier1ReReco", getTestArguments())
        rereco = workload.getTask("DataProcessing")
        return workload
예제 #10
0
파일: Block_t.py 프로젝트: ticoann/WMCore
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1']
        task.data.input.splitting.lumis = ['1,1']
        units = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
예제 #11
0
파일: Block_t.py 프로젝트: stuartw/WMCore
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload',
                                              rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1'] 
        task.data.input.splitting.lumis = ['1,1']
        units = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
예제 #12
0
파일: Block_t.py 프로젝트: ticoann/WMCore
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # blocks with 0 files are skipped
        # set all blocks in request to 0 files, no work should be found & an error is raised
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
        Globals.GlobalParams.resetParams()
예제 #13
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset : []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs),
                           Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
예제 #14
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs),
                           Tier1ReRecoWorkload, task, dbs,
                           {dataset + '1': []})
예제 #15
0
파일: Block_t.py 프로젝트: cinquo/WMCore
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
예제 #16
0
파일: Block_t.py 프로젝트: ticoann/WMCore
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
예제 #17
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow split by Lumi"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
예제 #18
0
    def setUp(self):
        """
        _setUp_

        Setup some reasonable defaults for the ReReco workflow.
        """
        self.unmergedLFNBase = "/store/backfill/2/unmerged"
        self.mergedLFNBase = "/store/backfill/2"
        self.processingVersion = "v1"
        self.cmsswVersion = "CMSSW_3_4_2_patch1"
        self.acquisitionEra = "WMAgentCommissioining10"
        self.primaryDataset = "MinimumBias"

        self.workload = rerecoWorkload("Tier1ReReco", getTestArguments())
        return
예제 #19
0
    def setUp(self):
        """
        _setUp_

        Setup some reasonable defaults for the ReReco workflow.
        """
        self.unmergedLFNBase = "/store/backfill/2/unmerged"
        self.mergedLFNBase = "/store/backfill/2"
        self.processingVersion = "v1"
        self.cmsswVersion = "CMSSW_3_4_2_patch1"
        self.acquisitionEra = "WMAgentCommissioining10"
        self.primaryDataset = "MinimumBias"

        self.workload = rerecoWorkload("Tier1ReReco", getTestArguments())
        return
예제 #20
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow split by Lumi"""
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 2)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(2, unit['Jobs'])
예제 #21
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(2, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
예제 #22
0
파일: Block_t.py 프로젝트: cinquo/WMCore
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet' :
                                                          Tier1ReRecoWorkload.listOutputDatasets()})
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), ['T2_XX_SiteA', 'T2_XX_SiteB'])
        return
예제 #23
0
    def getMergeACDCSpec(self, splittingAlgo="ParentlessMergeBySize", splittingArgs={}):
        """
        _getMergeACDCSpec_

        Get a ACDC spec for the merge task of a ReReco workload
        """
        Tier1ReRecoWorkload = rerecoWorkload(self.workflowName, getTestArguments())
        Tier1ReRecoWorkload.truncate(
            "ACDC_%s" % self.workflowName,
            "/%s/DataProcessing/DataProcessingMergeRECOoutput" % self.workflowName,
            self.couchUrl,
            self.acdcDBName,
        )
        Tier1ReRecoWorkload.setJobSplittingParameters(
            "/ACDC_%s/DataProcessingMergeRECOoutput" % self.workflowName, splittingAlgo, splittingArgs
        )
        return Tier1ReRecoWorkload
예제 #24
0
파일: Block_t.py 프로젝트: stuartw/WMCore
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(2, len(units))
            blocks = [] # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
예제 #25
0
파일: Block_t.py 프로젝트: zhiwenuil/WMCore
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
         blocks = [] # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(1, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
         self.assertEqual(len(units),
                          len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #26
0
    def getProcessingACDCSpec(
        self, splittingAlgo="LumiBased", splittingArgs={"lumis_per_job": 8}, setLocationFlag=False
    ):
        """
        _getProcessingACDCSpec_

        Get a ACDC spec for the processing task of a ReReco workload
        """
        Tier1ReRecoWorkload = rerecoWorkload(self.workflowName, getTestArguments())
        Tier1ReRecoWorkload.truncate(
            "ACDC_%s" % self.workflowName, "/%s/DataProcessing" % self.workflowName, self.couchUrl, self.acdcDBName
        )
        Tier1ReRecoWorkload.setJobSplittingParameters(
            "/ACDC_%s/DataProcessing" % self.workflowName, splittingAlgo, splittingArgs
        )
        if setLocationFlag:
            Tier1ReRecoWorkload.setLocationDataSourceFlag()
            Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist)
        return Tier1ReRecoWorkload
예제 #27
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(4, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
             self.assertEqual(40, unit['NumberOfLumis'])
             self.assertEqual(20, unit['NumberOfFiles'])
             self.assertEqual(20000, unit['NumberOfEvents'])
예제 #28
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         # Take dataset and force to run over only 1 block
         units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task,
                                         {dataset + '#1' : []})
         self.assertEqual(1, len(units))
         blocks = [] # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(1, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
         self.assertNotEqual(len(units),
                          len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #29
0
파일: Block_t.py 프로젝트: ticoann/WMCore
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         # Take dataset and force to run over only 1 block
         units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task, {
             dataset + '#1': []
         })
         self.assertEqual(1, len(units))
         blocks = []  # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(1, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
         self.assertNotEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #30
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
 def testParentProcessing(self):
     """
     test parent processing: should have the same results as rereco test
     with the parent flag and dataset.
     """
     parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)
     parentProcSpec.setStartPolicy('Dataset', **self.splitArgs)
     inputDataset = getFirstTask(parentProcSpec).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in parentProcSpec.taskIterator():
         units = Dataset(**self.splitArgs)(parentProcSpec, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(4, unit['Jobs'])
             self.assertEqual(parentProcSpec, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
             self.assertEqual(True, unit['ParentFlag'])
             self.assertEqual(0, len(unit['ParentData']))
예제 #31
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units = Dataset(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(2, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [dataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
예제 #32
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
            blocks = [] # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #33
0
파일: Block_t.py 프로젝트: ticoann/WMCore
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(),
                          len(units))
         blocks = []  # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertEqual(1, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(4, unit['NumberOfLumis'])
             self.assertEqual(10, unit['NumberOfFiles'])
             self.assertEqual(10000, unit['NumberOfEvents'])
         self.assertEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
예제 #34
0
파일: Block_t.py 프로젝트: ticoann/WMCore
    def testWhiteBlackLists(self):
        """Block/Run White/Black lists"""
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        rerecoArgs2 = {'BlockBlacklist': [dataset + '#1']}
        rerecoArgs2.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertNotEqual(units[0]['Inputs'].keys(),
                            rerecoArgs2['BlockBlacklist'])

        # Block Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#1']
        rerecoArgs2['BlockBlacklist'] = []
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(),
                         rerecoArgs2['BlockWhitelist'])

        # Block Mixed Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#2']
        rerecoArgs2['BlockBlacklist'] = [dataset + '#1']
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(),
                         rerecoArgs2['BlockWhitelist'])

        # Run Whitelist
        rerecoArgs3 = {'RunWhitelist': [1]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#1'])

        # Run Blacklist
        rerecoArgs3 = {'RunBlacklist': [2, 3]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#1'])

        # Run Mixed Whitelist
        rerecoArgs3 = {'RunBlacklist': [1], 'RunWhitelist': [2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Block(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#2'])
예제 #35
0
    def testReRecoDroppingRECO(self):
        """
        _testReRecoDroppingRECO_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on. This tests run on unmerged RECO output
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments['ProcessingString'] = 'ProcString'
        dataProcArguments['ConfigCacheID'] = recoConfig
        dataProcArguments["SkimConfigs"] = [{
            "SkimName": "SomeSkim",
            "SkimInput": "RECOoutput",
            "SkimSplitAlgo": "FileBased",
            "SkimSplitArgs": {
                "files_per_job": 1,
                "include_parents": True
            },
            "ConfigCacheID": skimConfig,
            "Scenario": None
        }]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["TransientOutputModules"] = ["RECOoutput"]

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.\
                         SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2')

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "DataProcessing",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" %
                goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-RECOoutput")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" %
                skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s"
                % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged,
                                             workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"],
                             "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" %
                skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s"
                % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(
                name=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive"
                % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect"
                % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect,
                                         workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
예제 #36
0
 def createWMSpec(self, name='ReRecoWorkload'):
     wmspec = rerecoWorkload(name, rerecoArgs)
     wmspec.setSpecUrl("/path/to/workload")
     return wmspec
예제 #37
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll only test the skims here.
        """
        skimConfig = self.injectSkimConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "outputRECORECO",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()
            
            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                             "Error: Wrong split algo.")

        return
예제 #38
0
파일: Block_t.py 프로젝트: cinquo/WMCore
    def testContinuousSplittingSupport(self):
        """Can modify successfully policies for continuous splitting"""
        policyInstance = Block(**self.splitArgs)
        self.assertTrue(policyInstance.supportsWorkAddition(), "Block instance should support continuous splitting")
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
            blocks = [] # fill with blocks as we get work units for them
            inputs = {}
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(4, unit['NumberOfLumis'])
                self.assertEqual(10, unit['NumberOfFiles'])
                self.assertEqual(10000, unit['NumberOfEvents'])
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))

        # Modify the spec and task, get first a fresh policy instance
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        # Pop up 2 more blocks for the dataset with different statistics
        Globals.GlobalParams.setNumOfBlocksPerDataset(Globals.GlobalParams.numOfBlocksPerDataset() + 2)
        Globals.GlobalParams.setNumOfFilesPerBlock(10) # Emulator is crooked, it gives the sum of all the files in the dataset not block


        # Now run another pass of the Block policy
        policyInstance = Block(**self.splitArgs)
        policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(2, len(units))
            self.assertEqual(0, len(rejectedWork))
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(8, unit['NumberOfLumis'])
                self.assertEqual(40, unit['NumberOfFiles'])
                self.assertEqual(40000, unit['NumberOfEvents'])
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)) - 2)

        # Run one last time
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        return
예제 #39
0
arguments["SkimJobSplitAlgo"] = "FileBased"
arguments["SkimJobSplitArgs"] = {"files_per_job": 1, "include_parents": True}

if len(sys.argv) != 2:
    print "Usage:"
    print "./injectReRecoWorkflow.py PROCESSING_VERSION"
    sys.exit(1)
else:
    arguments["ProcessingVersion"] = sys.argv[1]

connectToDB()

workloadName = "ReReco-%s" % arguments["ProcessingVersion"]
workloadFile = "reReco-%s.pkl" % arguments["ProcessingVersion"]
os.mkdir(workloadName)
workload = rerecoWorkload(workloadName, arguments)
workloadPath = os.path.join(workloadName, workloadFile)
workload.setOwner("*****@*****.**")
workload.setSpecUrl(workloadPath)

# Build a sandbox using TaskMaker
taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName))
taskMaker.skipSubscription = True
taskMaker.processWorkload()

workload.save(workloadPath)

def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_
예제 #40
0
 def createWMSpec(self, name = 'ReRecoWorkload'):
     wmspec = rerecoWorkload(name, rerecoArgs)
     wmspec.setSpecUrl("/path/to/workload")
     return wmspec 
예제 #41
0
arguments["SkimJobSplitAlgo"] = "FileBased"
arguments["SkimJobSplitArgs"] = {"files_per_job": 1, "include_parents": True}

if len(sys.argv) != 2:
    print "Usage:"
    print "./injectReRecoWorkflow.py PROCESSING_VERSION"
    sys.exit(1)
else:
    arguments["ProcessingVersion"] = sys.argv[1]

connectToDB()

workloadName = "ReReco-%s" % arguments["ProcessingVersion"]
workloadFile = "reReco-%s.pkl" % arguments["ProcessingVersion"]
os.mkdir(workloadName)
workload = rerecoWorkload(workloadName, arguments)
workloadPath = os.path.join(workloadName, workloadFile)
workload.setOwner("*****@*****.**")
workload.setSpecUrl(workloadPath)

# Build a sandbox using TaskMaker
taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName))
taskMaker.skipSubscription = True
taskMaker.processWorkload()

workload.save(workloadPath)


def injectFilesFromDBS(inputFileset, datasetPath):
    """
    _injectFilesFromDBS_
예제 #42
0
파일: Dataset_t.py 프로젝트: ticoann/WMCore
    def testWhiteBlackLists(self):
        """Block/Run White/Black lists"""
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        rerecoArgs2 = {'BlockBlacklist': [dataset + '#1']}
        rerecoArgs2.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 2.0)
        self.assertEqual(20, units[0]['NumberOfLumis'])
        self.assertEqual(10, units[0]['NumberOfFiles'])
        self.assertEqual(10000, units[0]['NumberOfEvents'])

        # Block Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#1']
        rerecoArgs2['BlockBlacklist'] = []
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 2.0)
        self.assertEqual(20, units[0]['NumberOfLumis'])
        self.assertEqual(10, units[0]['NumberOfFiles'])
        self.assertEqual(10000, units[0]['NumberOfEvents'])

        # Block Mixed Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#2']
        rerecoArgs2['BlockBlacklist'] = [dataset + '#1']
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 2.0)
        self.assertEqual(20, units[0]['NumberOfLumis'])
        self.assertEqual(10, units[0]['NumberOfFiles'])
        self.assertEqual(10000, units[0]['NumberOfEvents'])

        # Run Whitelist
        rerecoArgs3 = {'RunWhitelist': [1]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 1.0)
        self.assertEqual(25, units[0]['NumberOfLumis'])
        self.assertEqual(5, units[0]['NumberOfFiles'])
        self.assertEqual(5000, units[0]['NumberOfEvents'])

        rerecoArgs3 = {'RunWhitelist': [1, 2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 3.0)
        self.assertEqual(75, units[0]['NumberOfLumis'])
        self.assertEqual(15, units[0]['NumberOfFiles'])
        self.assertEqual(15000, units[0]['NumberOfEvents'])

        # Run Blacklist
        rerecoArgs3 = {'RunBlacklist': [2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 2.0)
        self.assertEqual(50, units[0]['NumberOfLumis'])
        self.assertEqual(10, units[0]['NumberOfFiles'])
        self.assertEqual(10000, units[0]['NumberOfEvents'])

        # Run Mixed Whitelist
        rerecoArgs3 = {'RunBlacklist': [1], 'RunWhitelist': [2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3)
        blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 2.0)
        self.assertEqual(50, units[0]['NumberOfLumis'])
        self.assertEqual(10, units[0]['NumberOfFiles'])
        self.assertEqual(10000, units[0]['NumberOfEvents'])
예제 #43
0
    def testWhiteBlackLists(self):
        """Block/Run White/Black lists"""
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        rerecoArgs2 = {'BlockBlacklist' : [dataset + '#1']}
        rerecoArgs2.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 1.0)

        # Block Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#1']
        rerecoArgs2['BlockBlacklist'] = []
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 1.0)

        # Block Mixed Whitelist
        rerecoArgs2['BlockWhitelist'] = [dataset + '#2']
        rerecoArgs2['BlockBlacklist'] = [dataset + '#1']
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs2)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Jobs'], 1.0)

        # Run Whitelist
        rerecoArgs3 = {'RunWhitelist' : [1]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 1.0)

        rerecoArgs3 = {'RunWhitelist' : [1 ,2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 2.0)

        # Run Blacklist
        rerecoArgs3 = {'RunBlacklist' : [2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                    rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 1.0)

        # Run Mixed Whitelist
        rerecoArgs3 = {'RunBlacklist' : [1], 'RunWhitelist' : [2]}
        rerecoArgs3.update(rerecoArgs)
        blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload',
                                                     rerecoArgs3)
        task = getFirstTask(blacklistBlockWorkload)
        units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(units[0]['Inputs'].keys(), [dataset])
        self.assertEqual(units[0]['Jobs'], 1.0)
예제 #44
0
파일: ReReco_t.py 프로젝트: cinquo/WMCore
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll test the skims and DQMHarvest here.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments['ProcessingString']  = 'ProcString'
        dataProcArguments['ConfigCacheID'] = recoConfig
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "RECOoutput",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\
                         tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged")
        topLevelFileset.loadData()

        dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
예제 #45
0
 def makeWorkload(self, schema):
     return rerecoWorkload(schema['RequestName'], schema).data
예제 #46
0
 def makeWorkload(self, schema):
     # FIXME
     return rerecoWorkload(schema['RequestName'], schema).data