def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(8) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(20) splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([180899, 180992]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 45) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): jobs += runLumis[run] self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # blocks with 0 files are skipped # set all blocks in request to 0 files, no work should be found & an error is raised Globals.GlobalParams.setNumOfFilesPerBlock(0) processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) Globals.GlobalParams.resetParams()
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0]) print "runLumis", runLumis for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): self.assertEqual(runLumis[run], None) # This is what it is with DBS3 unless we calculate it self.assertEqual(75, int(wq_jobs))
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(8) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(20) splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 0) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): jobs += runLumis[run] self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
def testInvalidSpecs(self): """Specs with no work""" # no dataset rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # dataset non existent processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO" for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # invalid run whitelist processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" dqmHarvArgs = getRequestArgs() # no dataset dqmHarvArgs["DQMConfigCacheID"] = createConfig(dqmHarvArgs["CouchDBName"]) factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction('NoInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask(DQMHarvWorkload).data.input.dataset = None for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), DQMHarvWorkload, task) # invalid dataset name DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask(DQMHarvWorkload).data.input.dataset.name = '/MinimumBias/FAKE-Filter-v1/RECO' for task in DQMHarvWorkload.taskIterator(): self.assertRaises(DBSReaderError, Dataset(), DQMHarvWorkload, task) # invalid run whitelist DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidRunNumberTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) DQMHarvWorkload.setRunWhitelist([666]) # not in this dataset for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), DQMHarvWorkload, task)
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runs = dbs[inputDataset.dbsurl].listRuns(unit['Inputs'].keys()[0]) jobs += len([x for x in runs if x in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist()]) self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([181061, 181175]) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): # This is what it is with DBS3 unless we calculate it self.assertEqual(runLumis[run], None) self.assertEqual(250, int(wq_jobs))
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([180899, 180992]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 45) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] # This fails. listRunLumis does not work correctly with DBS3, # returning None for the # of lumis in a run runLumis = dbs[inputDataset.dbsurl].listRunLumis(block=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): # This is what it is with DBS3 unless we calculate it self.assertEqual(runLumis[run], None) self.assertEqual(2, int(wq_jobs))
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # dataset non existent processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO" for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" # no dataset rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" mcspec = monteCarloWorkload('testProcessingInvalid', mcArgs) # 0 events getFirstTask(mcspec).addProduction(totalevents = 0) for task in mcspec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, MonteCarlo(), mcspec, task)
def testMultiMergeProductionWorkload(self): """Multi merge production workload""" getFirstTask(MultiMergeProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) for task in MultiMergeProductionWorkload.taskIterator(): units = MonteCarlo(**self.splitArgs)(MultiMergeProductionWorkload, task) self.assertEqual(10.0, len(units)) for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(unit['WMSpec'], MultiMergeProductionWorkload) self.assertEqual(unit['Task'], task)
def testInvalidSpecs(self): """Specs with no work""" mcspec = monteCarloWorkload('testProcessingInvalid', mcArgs) # 0 events getFirstTask(mcspec).addProduction(totalevents = 0) for task in mcspec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, MonteCarlo(), mcspec, task) # -ve split size mcspec2 = monteCarloWorkload('testProdInvalid', mcArgs) mcspec2.data.policies.start.SliceSize = -100 for task in mcspec2.taskIterator(): self.assertRaises(WorkQueueWMSpecError, MonteCarlo(), mcspec2, task)
def _getLHEProductionWorkload(self, splitArgs): totalevents = 1010 LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs) LHEProductionWorkload.setJobSplittingParameters( getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', {'events_per_job': splitArgs['SliceSize'], 'events_per_lumi': splitArgs['SubSliceSize']}) getFirstTask(LHEProductionWorkload).addProduction(totalEvents=totalevents) getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) return LHEProductionWorkload
def testShiftedStartSplitting(self): """ _testShiftedStartSplitting_ Make sure that splitting by event plus events in per lumi works when the first event and lumi is not 1 """ totalevents = 542674 splitArgs = dict(SliceType = 'NumEvents', SliceSize = 47, MaxJobsPerElement = 5, SubSliceType = 'NumEventsPerLumi', SubSliceSize = 13) LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs) LHEProductionWorkload.setJobSplittingParameters( getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', {'events_per_job': splitArgs['SliceSize'], 'events_per_lumi': splitArgs['SubSliceSize']}) getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) getFirstTask(LHEProductionWorkload).addProduction(totalevents = totalevents) getFirstTask(LHEProductionWorkload).setFirstEventAndLumi(50,100) getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) for task in LHEProductionWorkload.taskIterator(): units, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task) SliceSize = LHEProductionWorkload.startPolicyParameters()['SliceSize'] self.assertEqual(math.ceil(float(totalevents) / (SliceSize * splitArgs['MaxJobsPerElement'])), len(units)) first_event = 50 first_lumi = 100 first_run = 1 lumis_per_job = int(math.ceil(float(SliceSize) / splitArgs['SubSliceSize'])) for unit in units: self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement']) self.assertEqual(unit['WMSpec'], LHEProductionWorkload) self.assertEqual(unit['Task'], task) self.assertEqual(unit['Mask']['FirstEvent'], first_event) self.assertEqual(unit['Mask']['FirstLumi'], first_lumi) last_event = first_event + (SliceSize * unit['Jobs']) - 1 last_lumi = first_lumi + (lumis_per_job * unit['Jobs']) - 1 if last_event > totalevents: # this should be the last unit of work last_event = totalevents + 50 - 1 last_lumi = first_lumi last_lumi += math.ceil(((last_event - first_event + 1) % SliceSize) / splitArgs['SubSliceSize']) last_lumi += (lumis_per_job * (unit['Jobs'] - 1)) - 1 self.assertEqual(unit['Mask']['LastEvent'], last_event) self.assertEqual(unit['Mask']['LastLumi'], last_lumi) self.assertEqual(unit['Mask']['FirstRun'], first_run) first_event = last_event + 1 first_lumi = last_lumi + 1 self.assertEqual(unit['Mask']['LastEvent'], totalevents + 50 - 1)
def testMultiTaskProcessingWorkload(self): """Multi Task Processing Workflow""" count = 0 getFirstTask(MultiTaskProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) for task in MultiTaskProductionWorkload.taskIterator(): count += 1 task.setFirstEventAndLumi(1, 1) units, _ = MonteCarlo(**self.splitArgs)(MultiTaskProductionWorkload, task) self.assertEqual(10 * count, len(units)) for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(unit['WMSpec'], MultiTaskProductionWorkload) self.assertEqual(unit['Task'], task) self.assertEqual(count, 2)
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) Tier1ReRecoWorkload.data.request.priority = 69 inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(47, len(units)) for unit in units: self.assertEqual(69, unit['Priority']) self.assertTrue(1 <= unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertTrue(1 <= unit['NumberOfLumis']) self.assertTrue(1 <= unit['NumberOfFiles']) self.assertTrue(0 <= unit['NumberOfEvents']) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].listFileBlocks(dataset)))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units, _ = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def notestParentProcessing(self): # Does not work with a RAW dataset, need a different workload """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcArgs["ConfigCacheID"] = createConfig( parentProcArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() parentProcSpec = factory.factoryWorkloadConstruction( 'testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units, _, _ = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(47, len(units)) for unit in units: self.assertTrue(1 <= unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].listFileBlocks(dataset)))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) dummyDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], }) units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def notestParentProcessing(self): # Does not work with a RAW dataset, need a different workload """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units, _ = Block(**self.splitArgs)(parentProcSpec, task) self.assertEqual(47, len(units)) for unit in units: import pdb pdb.set_trace() self.assertTrue(1 <= unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(1, len(unit['ParentData'])) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) parentProcArgs = getReRecoArgs(parent=True) parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"]) # This dataset does have parents. Adding it here to keep the test going. # It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) #parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'}) parentProcArgs2.update({'InputDataset': '/Cosmics/ComissioningHI-PromptReco-v1/RECO'}) from pprint import pprint pprint(parentProcArgs2) parentProcSpec = rerecoWorkload('ReRecoWorkload', parentProcArgs2, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) parentProcSpec.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath() for task in parentProcSpec.taskIterator(): units, _, _ = Dataset(**splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(3993, unit['Jobs']) self.assertEqual(7985, unit['NumberOfLumis']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testGetMaskedBlocks(self): """ _testGetMaskedBlocks_ Check that getMaskedBlocks is returning the correct information """ rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) inputDataset = task.inputDataset() inputDataset.primary = 'SingleElectron' inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380' inputDataset.tier = 'USER' task.data.input.splitting.runs = [166921, 166429, 166911] task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20'] lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], }) inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], }) dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = DBSReader(inputDataset.dbsurl) maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset) for dummyBlock, files in maskedBlocks.iteritems(): for dummyFile, lumiList in files.iteritems(): self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ Tier1ReRecoWorkload = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) dummyDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList={ '206371': [[1, 50], [60, 70]], '180899': [[1, 1]], }) units, _, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() # This dataset does have parents. Adding it here to keep the test going. It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'}) parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2) parentProcSpec.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units, _ = Dataset(**self.splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) parentProcArgs = getReRecoArgs(parent=True) parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() # This dataset does have parents. Adding it here to keep the test going. # It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'}) parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2) parentProcSpec.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath() for task in parentProcSpec.taskIterator(): units, _ = Dataset(**splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(847, unit['Jobs']) self.assertEqual(1694, unit['NumberOfLumis']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testWithMaskedBlocks(self): """ _testWithMaskedBlocks_ Test job splitting with masked blocks """ Globals.GlobalParams.setNumOfRunsPerFile(3) Globals.GlobalParams.setNumOfLumisPerBlock(5) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) inputDataset = task.inputDataset() task.data.input.splitting.runs = [181061, 180899] task.data.input.splitting.lumis = ['1,50,60,70', '1,1'] lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } ) dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) nLumis = 0 for unit in units: nLumis += unit['NumberOfLumis'] self.assertEqual(len(lumiMask.getLumis()), nLumis)
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(47, len(units)) blocks = [] # fill with blocks as we get work units for them for unit in units: self.assertEqual(69, unit['Priority']) self.assertTrue(1 <= unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertTrue(1 <= unit['NumberOfLumis']) self.assertTrue(1 <= unit['NumberOfFiles']) self.assertTrue(0 <= unit['NumberOfEvents']) self.assertEqual(len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) task.data.input.splitting.runs = ['1'] task.data.input.splitting.lumis = ['1,1'] units = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1)
def _getLHEProductionWorkload(self, splitArgs): totalevents = 1010 LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs) LHEProductionWorkload.setJobSplittingParameters( getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', { 'events_per_job': splitArgs['SliceSize'], 'events_per_lumi': splitArgs['SubSliceSize'] }) getFirstTask(LHEProductionWorkload).addProduction( totalEvents=totalevents) getFirstTask(LHEProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) return LHEProductionWorkload
def testMultiTaskProcessingWorkload(self): """Multi Task Processing Workflow""" count = 0 getFirstTask(MultiTaskProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) for task in MultiTaskProductionWorkload.taskIterator(): count += 1 task.setFirstEventAndLumi(1, 1) units, _, _ = MonteCarlo(**self.splitArgs)( MultiTaskProductionWorkload, task) self.assertEqual(10 * count, len(units)) for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(unit['WMSpec'], MultiTaskProductionWorkload) self.assertEqual(unit['Task'], task) self.assertEqual(count, 2)
def testShiftedStartSplitting(self): """ _testShiftedStartSplitting_ Make sure that splitting by event plus events in per lumi works when the first event and lumi is not 1 """ totalevents = 542674 splitArgs = dict(SliceType='NumEvents', SliceSize=47, MaxJobsPerElement=5, SubSliceType='NumEventsPerLumi', SubSliceSize=13) LHEProductionWorkload = taskChainWorkload('MonteCarloWorkload', mcArgs) LHEProductionWorkload.setJobSplittingParameters(getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', {'events_per_job': splitArgs['SliceSize'], 'events_per_lumi': splitArgs['SubSliceSize']}) getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) getFirstTask(LHEProductionWorkload).addProduction(totalEvents=totalevents) getFirstTask(LHEProductionWorkload).setFirstEventAndLumi(50, 100) getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB']) for task in LHEProductionWorkload.taskIterator(): units, _, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task) SliceSize = LHEProductionWorkload.startPolicyParameters()['SliceSize'] self.assertEqual(math.ceil(totalevents / (SliceSize * splitArgs['MaxJobsPerElement'])), len(units)) first_event = 50 first_lumi = 100 first_run = 1 lumis_per_job = int(math.ceil(SliceSize / splitArgs['SubSliceSize'])) for unit in units: self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement']) self.assertEqual(unit['WMSpec'], LHEProductionWorkload) self.assertEqual(unit['Task'], task) self.assertEqual(unit['Mask']['FirstEvent'], first_event) self.assertEqual(unit['Mask']['FirstLumi'], first_lumi) last_event = first_event + (SliceSize * unit['Jobs']) - 1 last_lumi = first_lumi + (lumis_per_job * unit['Jobs']) - 1 if last_event > totalevents: # this should be the last unit of work last_event = totalevents + 50 - 1 last_lumi = first_lumi last_lumi += math.ceil(((last_event - first_event + 1) % SliceSize) / splitArgs['SubSliceSize']) last_lumi += (lumis_per_job * (unit['Jobs'] - 1)) - 1 self.assertEqual(unit['Mask']['LastEvent'], last_event) self.assertEqual(unit['Mask']['LastLumi'], last_lumi) self.assertEqual(unit['Mask']['FirstRun'], first_run) first_event = last_event + 1 first_lumi = last_lumi + 1 self.assertEqual(unit['Mask']['LastEvent'], totalevents + 50 - 1)
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() # Block blacklist lumiWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) #task.data.input.splitting.runs = ['1'] task.data.input.splitting.runs = ['180992'] task.data.input.splitting.lumis = ['1,1'] units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(len(rejectedWork), 46)
def testExtremeSplits(self): """ _testExtremeSplits_ Make sure that the protection to avoid going over 2^32 works """ totalevents = 2**34 splitArgs = dict(SliceType='NumEvents', SliceSize=2**30, MaxJobsPerElement=7) LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs) LHEProductionWorkload.setJobSplittingParameters( getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', { 'events_per_job': splitArgs['SliceSize'], 'events_per_lumi': splitArgs['SliceSize'] }) getFirstTask(LHEProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) getFirstTask(LHEProductionWorkload).addProduction( totalEvents=totalevents) getFirstTask(LHEProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) for task in LHEProductionWorkload.taskIterator(): units, _, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task) SliceSize = LHEProductionWorkload.startPolicyParameters( )['SliceSize'] self.assertEqual( math.ceil(totalevents / (SliceSize * splitArgs['MaxJobsPerElement'])), len(units)) self.assertEqual(len(units), 3, "Should produce 3 units") unit1 = units[0] unit2 = units[1] unit3 = units[2] self.assertEqual(unit1['Jobs'], 7, 'First unit produced more jobs than expected') self.assertEqual(unit1['Mask']['FirstEvent'], 1, 'First unit has a wrong first event') self.assertEqual(unit1['Mask']['LastEvent'], 7 * (2**30), 'First unit has a wrong last event') self.assertEqual(unit2['Jobs'], 7, 'Second unit produced more jobs than expected') self.assertEqual(unit2['Mask']['FirstEvent'], 2**30 + 1, 'Second unit has a wrong first event') self.assertEqual(unit2['Mask']['LastEvent'], 8 * (2**30), 'Second unit has a wrong last event') self.assertEqual(unit3['Jobs'], 2, 'Third unit produced more jobs than expected') self.assertEqual(unit3['Mask']['FirstEvent'], 2 * (2**30) + 1, 'Third unit has a wrong first event') self.assertEqual(unit3['Mask']['LastEvent'], 4 * (2**30), 'First unit has a wrong last event')
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # dataset non existent processingSpec = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) getFirstTask( processingSpec ).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO" for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} # Block blacklist lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) task.data.input.splitting.runs = ['1'] task.data.input.splitting.lumis = ['1,1'] units = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1)
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() # Block blacklist lumiWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) #task.data.input.splitting.runs = ['1'] task.data.input.splitting.runs = ['180992'] task.data.input.splitting.lumis = ['1,1'] units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(len(rejectedWork), 46)
def testIgnore0SizeBlocks(self): """Ignore blocks with 0 files""" Globals.GlobalParams.setNumOfFilesPerBlock(0) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() for task in Tier1ReRecoWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(8) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(20) splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([180899, 180992]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 45) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run runLumis = dbs[inputDataset.dbsurl].listRunLumis( block=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask( Tier1ReRecoWorkload).inputRunWhitelist(): self.assertEqual( runLumis[run], None ) # This is what it is with DBS3 unless we calculate it self.assertEqual(2, int(wq_jobs))
def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) # blocks with 0 files are skipped # set all blocks in request to 0 files, no work should be found & an error is raised Globals.GlobalParams.setNumOfFilesPerBlock(0) processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task) Globals.GlobalParams.resetParams()
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testDataDirectiveFromQueue(self): """Test data directive from queue""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testInvalidSpecs(self): """Specs with no work""" # no dataset rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2428, unit['Jobs'])
def testIgnore0SizeBlocks(self): """Ignore blocks with 0 files""" Globals.GlobalParams.setNumOfFilesPerBlock(0) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
def atestLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs2) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist lumiWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs2) task = getFirstTask(lumiWorkload) task.data.input.splitting.runs = ['1'] task.data.input.splitting.lumis = ['1,1'] units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(len(rejectedWork), 1)
def testIgnore0SizeBlocks(self): """Ignore blocks with 0 files""" Globals.GlobalParams.setNumOfFilesPerBlock(0) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() for task in Tier1ReRecoWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) Tier1ReRecoWorkload = rerecoWorkload( 'ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs) Tier1ReRecoWorkload.setRunWhitelist([180899, 180992]) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, rejectedWork, badWork = Block(**splitArgs)( Tier1ReRecoWorkload, task) # Blocks 1 and 2 match run distribution self.assertEqual(2, len(units)) self.assertEqual(len(rejectedWork), 45) self.assertEqual(len(badWork), 0) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] # This fails. listRunLumis does not work correctly with DBS3, # returning None for the # of lumis in a run runLumis = dbs[inputDataset.dbsurl].listRunLumis( block=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask( Tier1ReRecoWorkload).inputRunWhitelist(): # This is what it is with DBS3 unless we calculate it self.assertEqual(runLumis[run], None) self.assertEqual(2, int(wq_jobs))
def testContinuousSplittingSupport(self): """Can modify successfully policies for continuous splitting""" policyInstance = Block(**self.splitArgs) self.assertTrue(policyInstance.supportsWorkAddition(), "Block instance should support continuous splitting") rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = policyInstance(Tier1ReRecoWorkload, task) self.assertEqual(47, len(units)) blocks = [] # fill with blocks as we get work units for them inputs = {} for unit in units: blocks.extend(unit['Inputs'].keys()) inputs.update(unit['Inputs']) self.assertEqual(69, unit['Priority']) self.assertTrue(1 <= unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertTrue(1 <= unit['NumberOfLumis']) self.assertTrue(1 <= unit['NumberOfFiles']) self.assertTrue(0 <= unit['NumberOfEvents']) self.assertEqual( len(units), len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset))) # Modify the spec and task, get first a fresh policy instance policyInstance = Block(**self.splitArgs) for task in Tier1ReRecoWorkload.taskIterator(): policyInstance.modifyPolicyForWorkAddition( {'ProcessedInputs': inputs.keys()}) self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task) # Run one last time policyInstance = Block(**self.splitArgs) for task in Tier1ReRecoWorkload.taskIterator(): policyInstance.modifyPolicyForWorkAddition( {'ProcessedInputs': inputs.keys()}) self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task) return
def testInvalidSpecs(self): """Specs with no work""" # no dataset rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() processingSpec = factory.factoryWorkloadConstruction( 'testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task) # invalid dbs url processingSpec = factory.factoryWorkloadConstruction( 'testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # dataset non existent processingSpec = factory.factoryWorkloadConstruction( 'testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO" for task in processingSpec.taskIterator(): self.assertRaises(DBSReaderError, Block(), processingSpec, task) # invalid run whitelist processingSpec = factory.factoryWorkloadConstruction( 'testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" dqmHarvArgs = getRequestArgs() # no dataset dqmHarvArgs["DQMConfigCacheID"] = createConfig( dqmHarvArgs["CouchDBName"]) factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction( 'NoInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist( ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask(DQMHarvWorkload).data.input.dataset = None for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), DQMHarvWorkload, task) # invalid dataset name DQMHarvWorkload = factory.factoryWorkloadConstruction( 'InvalidInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist( ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask( DQMHarvWorkload ).data.input.dataset.name = '/MinimumBias/FAKE-Filter-v1/RECO' for task in DQMHarvWorkload.taskIterator(): self.assertRaises(DBSReaderError, Dataset(), DQMHarvWorkload, task) # invalid run whitelist DQMHarvWorkload = factory.factoryWorkloadConstruction( 'InvalidRunNumberTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist( ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) DQMHarvWorkload.setRunWhitelist([666]) # not in this dataset for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), DQMHarvWorkload, task)
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs'])
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(40, unit['NumberOfLumis']) self.assertEqual(20, unit['NumberOfFiles']) self.assertEqual(20000, unit['NumberOfEvents'])
def testLumiMask(self): """Lumi mask test""" rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"]) dummyWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) # Block blacklist lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) task = getFirstTask(lumiWorkload) # task.data.input.splitting.runs = ['1'] task.data.input.splitting.runs = ['180992'] task.data.input.splitting.lumis = ['1,1'] units, rejectedWork, badWork = Block(**self.splitArgs)(lumiWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(len(rejectedWork), 46) self.assertEqual(len(badWork), 0)
def testBasicProductionWorkload(self): """Basic Production Workload""" # change split defaults for this test totalevents = 1000000 splitArgs = dict(SliceType='NumberOfEvents', SliceSize=100, MaxJobsPerElement=5) mcArgs["EventsPerJob"] = 100 BasicProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs) getFirstTask(BasicProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) getFirstTask(BasicProductionWorkload).addProduction( totalEvents=totalevents) getFirstTask(BasicProductionWorkload).setSiteWhitelist( ['T2_XX_SiteA', 'T2_XX_SiteB']) for task in BasicProductionWorkload.taskIterator(): units, _, _ = MonteCarlo(**splitArgs)(BasicProductionWorkload, task) SliceSize = BasicProductionWorkload.startPolicyParameters( )['SliceSize'] self.assertEqual( math.ceil(totalevents / (SliceSize * splitArgs['MaxJobsPerElement'])), len(units)) first_event = 1 first_lumi = 1 first_run = 1 for unit in units: self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement']) self.assertEqual(unit['WMSpec'], BasicProductionWorkload) self.assertEqual(unit['Task'], task) self.assertEqual(unit['Mask']['FirstEvent'], first_event) self.assertEqual(unit['Mask']['FirstLumi'], first_lumi) last_event = first_event + (SliceSize * unit['Jobs']) - 1 if last_event > totalevents: # this should be the last unit of work last_event = totalevents last_lumi = first_lumi + unit['Jobs'] - 1 self.assertEqual(unit['Mask']['LastEvent'], last_event) self.assertEqual(unit['Mask']['LastLumi'], last_lumi) self.assertEqual(unit['Mask']['FirstRun'], first_run) self.assertEqual(last_lumi - first_lumi + 1, unit['NumberOfLumis']) self.assertEqual(last_event - first_event + 1, unit['NumberOfEvents']) first_event = last_event + 1 first_lumi += unit['Jobs'] # one lumi per job self.assertEqual(unit['Mask']['LastEvent'], totalevents)
def testGetMaskedBlocks(self): """ _testGetMaskedBlocks_ Check that getMaskedBlocks is returning the correct information """ Globals.GlobalParams.setNumOfRunsPerFile(3) Globals.GlobalParams.setNumOfLumisPerBlock(5) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( 'ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.data.request.priority = 69 task = getFirstTask(Tier1ReRecoWorkload) inputDataset = task.inputDataset() inputDataset.primary = 'SingleElectron' inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380' inputDataset.tier = 'USER' task.data.input.splitting.runs = [166921, 166429, 166911] task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20'] lumiMask = LumiList( compactList={ '166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], }) inputLumis = LumiList(compactList={ '166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], }) dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = DBSReader(inputDataset.dbsurl) maskedBlocks = Block(**self.splitArgs).getMaskedBlocks( task, dbs, dataset) for block, files in maskedBlocks.items(): for file, lumiList in files.items(): self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(15, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(4855, unit['NumberOfLumis']) self.assertEqual(72, unit['NumberOfFiles']) self.assertEqual(743201, unit['NumberOfEvents'])