def testInvalidSpecs(self): """Specs with no work""" # no dataset processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask( processingSpec ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" # no dataset rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset = None for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dbs url processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com' for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task) # invalid dataset name processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task) # invalid run whitelist processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs) processingSpec.setRunWhitelist([666]) # not in this dataset for task in processingSpec.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
def testInvalidSpecs(self): """Specs with no work""" dqmHarvArgs = getRequestArgs() # no dataset dqmHarvArgs["DQMConfigCacheID"] = createConfig(dqmHarvArgs["CouchDBName"]) factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction('NoInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask(DQMHarvWorkload).data.input.dataset = None for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueWMSpecError, Dataset(), DQMHarvWorkload, task) # invalid dataset name DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidInputDatasetTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) getFirstTask(DQMHarvWorkload).data.input.dataset.name = '/MinimumBias/FAKE-Filter-v1/RECO' for task in DQMHarvWorkload.taskIterator(): self.assertRaises(DBSReaderError, Dataset(), DQMHarvWorkload, task) # invalid run whitelist DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidRunNumberTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) DQMHarvWorkload.setRunWhitelist([666]) # not in this dataset for task in DQMHarvWorkload.taskIterator(): self.assertRaises(WorkQueueNoWorkError, Dataset(), DQMHarvWorkload, task)
def testDataDirectiveFromQueue(self): """Test data directive from queue""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testDataDirectiveFromQueue(self): """Test data directive from queue""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []}) self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs, {dataset + '1': []})
def testDatasetLumiMask(self): """ Test NumberOfRuns splitting type with lumi mask """ dqmHarvArgs = getRequestArgs() dqmHarvArgs["DQMConfigCacheID"] = createConfig( dqmHarvArgs["CouchDBName"]) dqmHarvArgs["LumiList"] = { "181358": [[71, 80], [95, 110]], "181151": [[1, 20]] } factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction( 'DQMHarvestTest', dqmHarvArgs) splitArgs = DQMHarvWorkload.startPolicyParameters() inputDataset = getFirstTask(DQMHarvWorkload).getInputDatasetPath() for task in DQMHarvWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(DQMHarvWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2, unit['Jobs']) self.assertEqual(DQMHarvWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(46, unit['NumberOfLumis']) self.assertEqual(4, unit['NumberOfFiles']) self.assertEqual(12342, unit['NumberOfEvents'])
def testDatasetRunWhitelist(self): """ Test NumberOfRuns splitting with run white list """ dqmHarvArgs = getRequestArgs() dqmHarvArgs["DQMConfigCacheID"] = createConfig( dqmHarvArgs["CouchDBName"]) dqmHarvArgs["RunWhitelist"] = [181358, 181417, 180992, 181151] factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction( 'DQMHarvestTest', dqmHarvArgs) splitArgs = DQMHarvWorkload.startPolicyParameters() inputDataset = getFirstTask(DQMHarvWorkload).getInputDatasetPath() for task in DQMHarvWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(DQMHarvWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(DQMHarvWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(217, unit['NumberOfLumis']) self.assertEqual(8, unit['NumberOfFiles']) self.assertEqual(83444, unit['NumberOfEvents'])
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) parentProcArgs = getReRecoArgs(parent=True) parentProcArgs["ConfigCacheID"] = createConfig( parentProcArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() # This dataset does have parents. Adding it here to keep the test going. # It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) parentProcArgs2.update({ 'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO' }) parentProcSpec = factory.factoryWorkloadConstruction( 'testParentProcessing', parentProcArgs2) parentProcSpec.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath() for task in parentProcSpec.taskIterator(): units, _ = Dataset(**splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(847, unit['Jobs']) self.assertEqual(1694, unit['NumberOfLumis']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testDatasetSingleJob(self): """ Test NumberOfRuns splitting type with very large SliceSize """ dqmHarvArgs = getRequestArgs() dqmHarvArgs["DQMConfigCacheID"] = createConfig( dqmHarvArgs["CouchDBName"]) dqmHarvArgs["DQMHarvestUnit"] = 'multiRun' factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction( 'DQMHarvestTest', dqmHarvArgs) splitArgs = DQMHarvWorkload.startPolicyParameters() inputDataset = getFirstTask(DQMHarvWorkload).getInputDatasetPath() for task in DQMHarvWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(DQMHarvWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(1, unit['Jobs']) self.assertEqual(DQMHarvWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(4855, unit['NumberOfLumis']) self.assertEqual(72, unit['NumberOfFiles']) self.assertEqual(743201, unit['NumberOfEvents'])
def testRunWhitelist(self): """ReReco lumi split with Run whitelist""" # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([2, 3]) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist jobs = 0 wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis( dataset=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask( Tier1ReRecoWorkload).inputRunWhitelist(): jobs += runLumis[run] self.assertEqual(int(jobs / splitArgs['SliceSize']), int(wq_jobs))
def testDatasetPolicy(self): """ Test ordinary NumberOfRuns splitting """ dqmHarvArgs = getRequestArgs() dqmHarvArgs["DQMConfigCacheID"] = createConfig( dqmHarvArgs["CouchDBName"]) factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction( 'DQMHarvestTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist( ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) splitArgs = DQMHarvWorkload.startPolicyParameters() inputDataset = getFirstTask(DQMHarvWorkload).getInputDatasetPath() for task in DQMHarvWorkload.taskIterator(): units, _, _ = Dataset(**splitArgs)(DQMHarvWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(47, unit['Jobs']) self.assertEqual(DQMHarvWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(list(unit['Inputs']), [inputDataset]) self.assertEqual(4855, unit['NumberOfLumis']) self.assertEqual(72, unit['NumberOfFiles']) self.assertEqual(743201, unit['NumberOfEvents'])
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) parentProcArgs = getReRecoArgs(parent=True) parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"]) # This dataset does have parents. Adding it here to keep the test going. # It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) #parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'}) parentProcArgs2.update({'InputDataset': '/Cosmics/ComissioningHI-PromptReco-v1/RECO'}) from pprint import pprint pprint(parentProcArgs2) parentProcSpec = rerecoWorkload('ReRecoWorkload', parentProcArgs2, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) parentProcSpec.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath() for task in parentProcSpec.taskIterator(): units, _, _ = Dataset(**splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(3993, unit['Jobs']) self.assertEqual(7985, unit['NumberOfLumis']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() # This dataset does have parents. Adding it here to keep the test going. It seems like "dbs" below is never used parentProcArgs2 = {} parentProcArgs2.update(parentProcArgs) parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'}) parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2) parentProcSpec.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units, _ = Dataset(**self.splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(64, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testRunWhitelist(self): """ ReReco lumi split with Run whitelist This test may not do much of anything anymore since listRunLumis is not in DBS3 """ # get files with multiple runs Globals.GlobalParams.setNumOfRunsPerFile(2) # a large number of lumis to ensure we get multiple runs Globals.GlobalParams.setNumOfLumisPerBlock(10) splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setRunWhitelist([181061, 181175]) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) # Check number of jobs in element match number for # dataset in run whitelist wq_jobs = 0 for unit in units: wq_jobs += unit['Jobs'] runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0]) for run in runLumis: if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist(): # This is what it is with DBS3 unless we calculate it self.assertEqual(runLumis[run], None) self.assertEqual(250, int(wq_jobs))
def testDataDirectiveFromQueue(self): """Test data directive from queue""" dqmHarvArgs = getRequestArgs() dqmHarvArgs["DQMConfigCacheID"] = createConfig(dqmHarvArgs["CouchDBName"]) factory = DQMHarvestWorkloadFactory() DQMHarvWorkload = factory.factoryWorkloadConstruction('DQMHarvestTest', dqmHarvArgs) DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"]) splitArgs = DQMHarvWorkload.startPolicyParameters() for task in DQMHarvWorkload.taskIterator(): self.assertRaises(RuntimeError, Dataset(**splitArgs), DQMHarvWorkload, task, {NOT_EXIST_DATASET: []})
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(2428, unit['Jobs'])
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs'])
def testLumiSplitTier1ReRecoWorkload(self): """Tier1 Re-reco workflow split by Lumi""" splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 2) rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs'])
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(40, unit['NumberOfLumis']) self.assertEqual(20, unit['NumberOfFiles']) self.assertEqual(20000, unit['NumberOfEvents'])
def testMultiTaskProcessingWorkload(self): """Multi Task Processing Workflow""" splitArgs = dict(SliceType='NumberOfFiles', SliceSize=5) datasets = [] tasks, count = 0, 0 for task in MultiTaskProcessingWorkload.taskIterator(): tasks += 1 inputDataset = task.getInputDatasetPath() datasets.append(inputDataset) for task in MultiTaskProcessingWorkload.taskIterator(): units, _, _ = Dataset(**splitArgs)(MultiTaskProcessingWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(22, unit['Jobs']) self.assertEqual(MultiTaskProcessingWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [datasets[count]]) count += 1 self.assertEqual(tasks, count)
def testMultiTaskProcessingWorkload(self): """Multi Task Processing Workflow""" datasets = [] tasks, count = 0, 0 for task in MultiTaskProcessingWorkload.taskIterator(): tasks += 1 inputDataset = task.inputDataset() datasets.append("/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)) for task in MultiTaskProcessingWorkload.taskIterator(): units, _ = Dataset(**self.splitArgs)(MultiTaskProcessingWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(22, unit['Jobs']) self.assertEqual(MultiTaskProcessingWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [datasets[count]]) count += 1 self.assertEqual(tasks, count)
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in Tier1ReRecoWorkload.taskIterator(): units, _ = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(15, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(4855, unit['NumberOfLumis']) self.assertEqual(72, unit['NumberOfFiles']) self.assertEqual(743201, unit['NumberOfEvents'])
def testTier1ReRecoWorkload(self): """Tier1 Re-reco workflow""" splitArgs = dict(SliceType='NumberOfFiles', SliceSize=5) rerecoArgs = getReRecoArgs() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs, assignArgs={'SiteWhitelist': ['T2_XX_SiteA']}) Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).getInputDatasetPath() for task in Tier1ReRecoWorkload.taskIterator(): units, _, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(15, unit['Jobs']) self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [inputDataset]) self.assertEqual(4855, unit['NumberOfLumis']) self.assertEqual(72, unit['NumberOfFiles']) self.assertEqual(743201, unit['NumberOfEvents'])
def testParentProcessing(self): """ test parent processing: should have the same results as rereco test with the parent flag and dataset. """ parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs) parentProcSpec.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(parentProcSpec).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} for task in parentProcSpec.taskIterator(): units = Dataset(**self.splitArgs)(parentProcSpec, task) self.assertEqual(1, len(units)) for unit in units: self.assertEqual(4, unit['Jobs']) self.assertEqual(parentProcSpec, unit['WMSpec']) self.assertEqual(task, unit['Task']) self.assertEqual(unit['Inputs'].keys(), [dataset]) self.assertEqual(True, unit['ParentFlag']) self.assertEqual(0, len(unit['ParentData']))
def testWhiteBlackLists(self): """Block/Run White/Black lists""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} white_list = "#5c53d062-0bed-11e1-b764-003048caaace" black_list = "#f29b82f0-0c50-11e1-b764-003048caaace" # Block blacklist rerecoArgs2 = {} rerecoArgs2.update(rerecoArgs) rerecoArgs2.update({'BlockBlacklist': [dataset + black_list]}) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 15.0) self.assertEqual(4813, units[0]['NumberOfLumis']) self.assertEqual(71, units[0]['NumberOfFiles']) self.assertEqual(725849, units[0]['NumberOfEvents']) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + white_list] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(21, units[0]['NumberOfLumis']) self.assertEqual(1, units[0]['NumberOfFiles']) self.assertEqual(20176, units[0]['NumberOfEvents']) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + white_list] rerecoArgs2['BlockBlacklist'] = [dataset + black_list] blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(21, units[0]['NumberOfLumis']) self.assertEqual(1, units[0]['NumberOfFiles']) self.assertEqual(20176, units[0]['NumberOfEvents']) # Run Whitelist rerecoArgs3 = {} rerecoArgs3.update(rerecoArgs) rerecoArgs3.update({'RunWhitelist': [181061]}) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(71, units[0]['NumberOfLumis']) self.assertEqual(1, units[0]['NumberOfFiles']) self.assertEqual(5694, units[0]['NumberOfEvents']) rerecoArgs3 = {} rerecoArgs3.update(rerecoArgs) rerecoArgs3.update({'RunWhitelist': [181061, 181175]}) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(250, units[0]['NumberOfLumis']) self.assertEqual(2, units[0]['NumberOfFiles']) self.assertEqual(13766, units[0]['NumberOfEvents']) # Run Blacklist rerecoArgs3 = {} rerecoArgs3.update(rerecoArgs) rerecoArgs3.update({'RunBlacklist': [181175]}) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 15.0) self.assertEqual(4676, units[0]['NumberOfLumis']) self.assertEqual(71, units[0]['NumberOfFiles']) self.assertEqual(735129, units[0]['NumberOfEvents']) # Run Mixed Whitelist rerecoArgs3 = {} rerecoArgs3.update(rerecoArgs) rerecoArgs3.update({'RunBlacklist': [181175], 'RunWhitelist': [181061]}) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(71, units[0]['NumberOfLumis']) self.assertEqual(1, units[0]['NumberOfFiles']) self.assertEqual(5694, units[0]['NumberOfEvents'])
def testWhiteBlackLists(self): """Block/Run White/Black lists""" rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) factory = ReRecoWorkloadFactory() Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)} # Block blacklist rerecoArgs2 = {'BlockBlacklist' : [dataset + '#1']} rerecoArgs2.update(rerecoArgs) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(4, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#1'] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(4, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#2'] rerecoArgs2['BlockBlacklist'] = [dataset + '#1'] blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(4, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Whitelist rerecoArgs3 = {'RunWhitelist' : [1]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(10, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) rerecoArgs3 = {'RunWhitelist' : [1 ,2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 4.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(20, units[0]['NumberOfFiles']) self.assertEqual(20000, units[0]['NumberOfEvents']) # Run Blacklist rerecoArgs3 = {'RunBlacklist' : [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(10, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Mixed Whitelist rerecoArgs3 = {'RunBlacklist' : [1], 'RunWhitelist' : [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units, _ = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(10, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents'])
def testWhiteBlackLists(self): """Block/Run White/Black lists""" Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs) Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs) inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset() dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)} # Block blacklist rerecoArgs2 = {'BlockBlacklist': [dataset + '#1']} rerecoArgs2.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#1'] rerecoArgs2['BlockBlacklist'] = [] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Block Mixed Whitelist rerecoArgs2['BlockWhitelist'] = [dataset + '#2'] rerecoArgs2['BlockBlacklist'] = [dataset + '#1'] blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(20, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Whitelist rerecoArgs3 = {'RunWhitelist': [1]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 1.0) self.assertEqual(25, units[0]['NumberOfLumis']) self.assertEqual(5, units[0]['NumberOfFiles']) self.assertEqual(5000, units[0]['NumberOfEvents']) rerecoArgs3 = {'RunWhitelist': [1, 2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 3.0) self.assertEqual(75, units[0]['NumberOfLumis']) self.assertEqual(15, units[0]['NumberOfFiles']) self.assertEqual(15000, units[0]['NumberOfEvents']) # Run Blacklist rerecoArgs3 = {'RunBlacklist': [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(50, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents']) # Run Mixed Whitelist rerecoArgs3 = {'RunBlacklist': [1], 'RunWhitelist': [2]} rerecoArgs3.update(rerecoArgs) blacklistBlockWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs3) blacklistBlockWorkload.setStartPolicy('Dataset', **self.splitArgs) task = getFirstTask(blacklistBlockWorkload) units = Dataset(**self.splitArgs)(blacklistBlockWorkload, task) self.assertEqual(len(units), 1) self.assertEqual(units[0]['Inputs'].keys(), [dataset]) self.assertEqual(units[0]['Jobs'], 2.0) self.assertEqual(50, units[0]['NumberOfLumis']) self.assertEqual(10, units[0]['NumberOfFiles']) self.assertEqual(10000, units[0]['NumberOfEvents'])