def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # blocks with 0 files are skipped # set all blocks in request to 0 files, no work should be found & an error is raised Globals.GlobalParams.setNumOfFilesPerBlock(0) processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) Globals.GlobalParams.resetParams()
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runs = dbs[inputDataset.dbsurl].listRuns(unit['Inputs'].keys()[0]) jobs += len([x for x in runs if x in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist()]) self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis( dataset=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask( Tier1ReRecoWorkload).inputRunWhitelist(): jobs += runLumis[run] self.assertEqual(int(jobs / splitArgs['SliceSize']), int(wq_jobs))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(8) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(20) splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 0) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): jobs += runLumis[run] self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
def createWorkload(self): """ Create a workload in order to test things """ workload = rerecoWorkload("Tier1ReReco", getTestArguments()) rereco = workload.getTask("DataProcessing") return workload
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) task.data.input.splitting.runs = ['1'] task.data.input.splitting.lumis = ['1,1'] units = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1)
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} # Block blacklist lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) task.data.input.splitting.runs = ['1'] task.data.input.splitting.lumis = ['1,1'] units = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1)
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # blocks with 0 files are skipped # set all blocks in request to 0 files, no work should be found & an error is raised Globals.GlobalParams.setNumOfFilesPerBlock(0) processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) Globals.GlobalParams.resetParams()
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset : []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testIgnore0SizeBlocks(self): """Ignore blocks with 0 files""" Globals.GlobalParams.setNumOfFilesPerBlock(0) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
def testIgnore0SizeBlocks(self): """Ignore blocks with 0 files""" Globals.GlobalParams.setNumOfFilesPerBlock(0) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs'])
def setUp(self): """ _setUp_ Setup some reasonable defaults for the ReReco workflow. """ self.unmergedLFNBase = "/store/backfill/2/unmerged" self.mergedLFNBase = "/store/backfill/2" self.processingVersion = "v1" self.cmsswVersion = "CMSSW_3_4_2_patch1" self.acquisitionEra = "WMAgentCommissioining10" self.primaryDataset = "MinimumBias" self.workload = rerecoWorkload("Tier1ReReco", getTestArguments()) return
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 2) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2, unit['Jobs'])
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset])
def testDatasetLocation(self): """ _testDatasetLocation_ This is a function of all start policies so only test it here as there is no StartPolicyInterface unit test """ policyInstance = Block(**self.splitArgs) # The policy instance must be called first to initialize the values Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) for task in Tier1ReRecoWorkload.taskIterator(): policyInstance(Tier1ReRecoWorkload, task) outputs = policyInstance.getDatasetLocations({'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet' : Tier1ReRecoWorkload.listOutputDatasets()}) for dataset in outputs: self.assertEqual(sorted(outputs[dataset]), ['T2_XX_SiteA', 'T2_XX_SiteB']) return
def getMergeACDCSpec(self, splittingAlgo="ParentlessMergeBySize", splittingArgs={}): """ _getMergeACDCSpec_ Get a ACDC spec for the merge task of a ReReco workload """ Tier1ReRecoWorkload = rerecoWorkload(self.workflowName, getTestArguments()) Tier1ReRecoWorkload.truncate( "ACDC_%s" % self.workflowName, "/%s/DataProcessing/DataProcessingMergeRECOoutput" % self.workflowName, self.couchUrl, self.acdcDBName, ) Tier1ReRecoWorkload.setJobSplittingParameters( "/ACDC_%s/DataProcessingMergeRECOoutput" % self.workflowName, splittingAlgo, splittingArgs ) return Tier1ReRecoWorkload
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Block(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(2, len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(4, unit['Jobs'])
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def getProcessingACDCSpec( self, splittingAlgo="LumiBased", splittingArgs={"lumis_per_job": 8}, setLocationFlag=False ): """ _getProcessingACDCSpec_ Get a ACDC spec for the processing task of a ReReco workload """ Tier1ReRecoWorkload = rerecoWorkload(self.workflowName, getTestArguments()) Tier1ReRecoWorkload.truncate( "ACDC_%s" % self.workflowName, "/%s/DataProcessing" % self.workflowName, self.couchUrl, self.acdcDBName ) Tier1ReRecoWorkload.setJobSplittingParameters( "/ACDC_%s/DataProcessing" % self.workflowName, splittingAlgo, splittingArgs ) if setLocationFlag: Tier1ReRecoWorkload.setLocationDataSourceFlag() Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist) return Tier1ReRecoWorkload
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(40, unit['NumberOfLumis']) self.assertEqual(20, unit['NumberOfFiles']) self.assertEqual(20000, unit['NumberOfEvents'])
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): # Take dataset and force to run over only 1 block units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset + '#1' : []}) self.assertEqual(1, len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertNotEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): # Take dataset and force to run over only 1 block units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task, { dataset + '#1': [] }) self.assertEqual(1, len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertNotEqual( len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) parentProcSpec.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Dataset(**self.splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Dataset(**self.splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(69, unit['Priority']) self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(4, unit['NumberOfLumis']) self.assertEqual(10, unit['NumberOfFiles']) self.assertEqual(10000, unit['NumberOfEvents']) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testWhiteBlackLists(self): """Block/Run White/Black lists""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist rerecoArgs2 = {'BlockBlacklist': [dataset + '#1']} rerecoArgs2.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertNotEqual(units[0]['Inputs'].keys(), rerecoArgs2['BlockBlacklist']) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#1'] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), rerecoArgs2['BlockWhitelist']) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#2'] rerecoArgs2['BlockBlacklist'] = [dataset + '#1'] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), rerecoArgs2['BlockWhitelist']) # Run Whitelist rerecoArgs3 = {'RunWhitelist': [1]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#1']) # Run Blacklist rerecoArgs3 = {'RunBlacklist': [2, 3]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#1']) # Run Mixed Whitelist rerecoArgs3 = {'RunBlacklist': [1], 'RunWhitelist': [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Block(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset + '#2'])
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{ "SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": { "files_per_job": 1, "include_parents": True }, "ConfigCacheID": skimConfig, "Scenario": None }] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.\ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def createWMSpec(self, name='ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll only test the skims here. """ skimConfig = self.injectSkimConfig() dataProcArguments = getTestArguments() dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "outputRECORECO", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") return
def testContinuousSplittingSupport(self): """Can modify successfully policies for continuous splitting""" policyInstance = Block(**self.splitArgs) self.assertTrue(policyInstance.supportsWorkAddition(), "Block instance should support continuous splitting") Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = policyInstance(Tier1ReRecoWorkload, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them inputs = {} for unit in units: blocks.extend(unit['Inputs'].keys()) inputs.update(unit['Inputs']) self.assertEqual(69, unit['Priority']) self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(4, unit['NumberOfLumis']) self.assertEqual(10, unit['NumberOfFiles']) self.assertEqual(10000, unit['NumberOfEvents']) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset))) # Modify the spec and task, get first a fresh policy instance policyInstance = Block(**self.splitArgs) for task in Tier1ReRecoWorkload.taskIterator(): policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()}) self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task) # Pop up 2 more blocks for the dataset with different statistics Globals.GlobalParams.setNumOfBlocksPerDataset(Globals.GlobalParams.numOfBlocksPerDataset() + 2) Globals.GlobalParams.setNumOfFilesPerBlock(10) # Emulator is crooked, it gives the sum of all the files in the dataset not block # Now run another pass of the Block policy policyInstance = Block(**self.splitArgs) policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()}) for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = policyInstance(Tier1ReRecoWorkload, task) self.assertEqual(2, len(units)) self.assertEqual(0, len(rejectedWork)) for unit in units: blocks.extend(unit['Inputs'].keys()) inputs.update(unit['Inputs']) self.assertEqual(69, unit['Priority']) self.assertEqual(1, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(8, unit['NumberOfLumis']) self.assertEqual(40, unit['NumberOfFiles']) self.assertEqual(40000, unit['NumberOfEvents']) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)) - 2) # Run one last time policyInstance = Block(**self.splitArgs) for task in Tier1ReRecoWorkload.taskIterator(): policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()}) self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task) return
arguments["SkimJobSplitAlgo"] = "FileBased" arguments["SkimJobSplitArgs"] = {"files_per_job": 1, "include_parents": True} if len(sys.argv) != 2: print "Usage:" print "./injectReRecoWorkflow.py PROCESSING_VERSION" sys.exit(1) else: arguments["ProcessingVersion"] = sys.argv[1] connectToDB() workloadName = "ReReco-%s" % arguments["ProcessingVersion"] workloadFile = "reReco-%s.pkl" % arguments["ProcessingVersion"] os.mkdir(workloadName) workload = rerecoWorkload(workloadName, arguments) workloadPath = os.path.join(workloadName, workloadFile) workload.setOwner("*****@*****.**") workload.setSpecUrl(workloadPath) # Build a sandbox using TaskMaker taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName)) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadPath) def injectFilesFromDBS(inputFileset, datasetPath): """ _injectFilesFromDBS_
def createWMSpec(self, name = 'ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec
def testWhiteBlackLists(self): """Block/Run White/Black lists""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist rerecoArgs2 = {'BlockBlacklist': [dataset + '#1']} rerecoArgs2.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#1'] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#2'] rerecoArgs2['BlockBlacklist'] = [dataset + '#1'] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Whitelist rerecoArgs3 = {'RunWhitelist': [1]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(25, units[0]['NumberOfLumis']) self.assertEqual(5, units[0]['NumberOfFiles']) self.assertEqual(5000, units[0]['NumberOfEvents']) rerecoArgs3 = {'RunWhitelist': [1, 2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 3.0) self.assertEqual(75, units[0]['NumberOfLumis']) self.assertEqual(15, units[0]['NumberOfFiles']) self.assertEqual(15000, units[0]['NumberOfEvents']) # Run Blacklist rerecoArgs3 = {'RunBlacklist': [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(50, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Mixed Whitelist rerecoArgs3 = {'RunBlacklist': [1], 'RunWhitelist': [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(50, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents'])
def testWhiteBlackLists(self): """Block/Run White/Black lists""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} # Block blacklist rerecoArgs2 = {'BlockBlacklist' : [dataset + '#1']} rerecoArgs2.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 1.0) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#1'] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 1.0) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#2'] rerecoArgs2['BlockBlacklist'] = [dataset + '#1'] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 1.0) # Run Whitelist rerecoArgs3 = {'RunWhitelist' : [1]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) rerecoArgs3 = {'RunWhitelist' : [1 ,2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) # Run Blacklist rerecoArgs3 = {'RunBlacklist' : [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) # Run Mixed Whitelist rerecoArgs3 = {'RunBlacklist' : [1], 'RunWhitelist' : [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0)
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll test the skims and DQMHarvest here. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def makeWorkload(self, schema): return rerecoWorkload(schema['RequestName'], schema).data
def makeWorkload(self, schema): # FIXME return rerecoWorkload(schema['RequestName'], schema).data