def testPileupFetcherOnMC(self): pileupMcArgs = TaskChainWorkloadFactory.getTestArguments() pileupMcArgs['Task1']["MCPileup"] = "/Cosmics/ComissioningHI-PromptReco-v1/RECO" pileupMcArgs['Task1']["DataPileup"] = "/HighPileUp/Run2011A-v1/RAW" pileupMcArgs['Task1']["ConfigCacheID"] = self.injectGenerationConfig() pileupMcArgs["CouchDBName"] = "pileupfetcher_t" pileupMcArgs["CouchURL"] = os.environ["COUCHURL"] factory = TaskChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", pileupMcArgs) # now that the workload was created and args validated, we can add this PileupConfig pileupMcArgs["PileupConfig"] = parsePileupConfig(pileupMcArgs['Task1']["MCPileup"], pileupMcArgs['Task1']["DataPileup"]) # Since this is test of the fetcher - The loading from WMBS isn't # really necessary because the fetching happens before the workflow # is inserted into WMBS: feed the workload instance directly into fetcher: fetcher = PileupFetcher() creator = SandboxCreator() pathBase = "%s/%s" % (self.testDir, testWorkload.name()) for topLevelTask in testWorkload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): # this is how the call to PileupFetcher is happening # from the SandboxCreator test task = WMTask.WMTaskHelper(taskNode) taskPath = "%s/WMSandbox/%s" % (pathBase, task.name()) fetcher.setWorkingDirectory(taskPath) # create Sandbox for the fetcher ... creator._makePathonPackage(taskPath) fetcher(task) self._queryPileUpConfigFile(pileupMcArgs, task, taskPath)
def createMCWMSpec(self, name='MonteCarloWorkload'): mcArgs = TaskChainWorkloadFactory.getTestArguments() mcArgs["CouchDBName"] = rerecoArgs["CouchDBName"] mcArgs["Task1"]["ConfigCacheID"] = createConfig(mcArgs["CouchDBName"]) wmspec = taskChainWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec
def getProdArgs(): mcArgs = TaskChainWorkloadFactory.getTestArguments() mcArgs.update({ "CouchURL": None, "CouchDBName": None, "ConfigCacheDoc" : None }) mcArgs.pop('ConfigCacheDoc') return mcArgs
def buildMultithreadedTaskChain(self, filename): """ d Build a TaskChain from several sources and customization """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(filename)) # Construct args from the pieces starting with test args ... arguments = testArguments # ... continuing with the request for key in [ "CMSSWVersion", "ScramArch", "GlobalTag", "ProcessingVersion", "Multicore", "Memory", "TaskChain", "Task1", "Task2", "Task3", ]: arguments.update({key: request["createRequest"][key]}) for key in ["SiteBlacklist"]: arguments.update({key: request["assignRequest"][key]}) # ... then some local overrides del arguments["ConfigCacheID"] del arguments["ConfigCacheUrl"] arguments.update({"CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName}) # ... now fill in the ConfigCache documents created and override the inputs to link them up arguments["Task1"]["ConfigCacheID"] = processorDocs["DigiHLT"] arguments["Task2"]["ConfigCacheID"] = processorDocs["Reco"] arguments["Task2"]["InputFromOutputModule"] = "writeRAWDIGI" arguments["Task3"]["ConfigCacheID"] = processorDocs["ALCAReco"] arguments["Task3"]["InputFromOutputModule"] = "writeALCA" return arguments
def buildMultithreadedTaskChain(self, filename): """ d Build a TaskChain from several sources and customization """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(filename)) # Construct args from the pieces starting with test args ... arguments = testArguments # ... continuing with the request for key in ['CMSSWVersion', 'ScramArch', 'GlobalTag', 'ProcessingVersion', 'Multicore', 'Memory', 'TaskChain', 'Task1', 'Task2', 'Task3']: arguments.update({key : request['createRequest'][key]}) for key in ['SiteBlacklist']: arguments.update({key : request['assignRequest'][key]}) # ... then some local overrides del arguments['ConfigCacheID'] del arguments['ConfigCacheUrl'] arguments.update({ "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, }) # ... now fill in the ConfigCache documents created and override the inputs to link them up arguments['Task1']['ConfigCacheID'] = processorDocs['DigiHLT'] arguments['Task2']['ConfigCacheID'] = processorDocs['Reco'] arguments['Task2']['InputFromOutputModule'] = 'writeRAWDIGI' arguments['Task3']['ConfigCacheID'] = processorDocs['ALCAReco'] arguments['Task3']['InputFromOutputModule'] = 'writeALCA' return arguments
def testTrustFlags(self): """ _testTrustFlags_ Given a taskChain with 4 tasks, test whether TrustSitelists is set for the top level tasks and TrustPUSitelists is properly set to all tasks. """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() testArguments.update(createMultiGTArgs()) testArguments["CouchURL"] = self.testInit.couchUrl testArguments["CouchDBName"] = self.testInit.couchDbName testArguments["Task1"]["ConfigCacheID"] = processorDocs['DigiHLT'] testArguments["Task2"]["ConfigCacheID"] = processorDocs['Reco'] testArguments["Task3"]["ConfigCacheID"] = processorDocs['ALCAReco'] testArguments["Task4"]["ConfigCacheID"] = processorDocs['Skims'] arguments = testArguments factory = TaskChainWorkloadFactory() workload = factory.factoryWorkloadConstruction("YankingTheChain", arguments) for task in workload.getAllTasks(): flags = task.getTrustSitelists().values() self.assertEqual(flags, [False, False]) # set both flags to true now workload.setTrustLocationFlag(True, True) for task in workload.getAllTasks(): flags = task.getTrustSitelists() if task.isTopOfTree(): self.assertEqual(flags.values(), [True, True]) elif task.taskType() in ["Merge", "Harvesting", "Cleanup", "LogCollect"]: self.assertEqual(flags.values(), [False, False]) else: self.assertFalse(flags['trustlists']) self.assertTrue(flags['trustPUlists']) # set both to false now workload.setTrustLocationFlag(False, False) for task in workload.getAllTasks(cpuOnly=True): flags = task.getTrustSitelists().values() self.assertEqual(flags, [False, False]) return
def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 6, "IgnoredOutputModules" : ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1" :{ "TaskName" : "GenSim", "ConfigCacheID" : generatorDoc, "SplittingAlgo" : "EventBased", "RequestNumEvents" : 10000, "Seeding" : "AutomaticSeeding", "PrimaryDataset" : "RelValTTBar", }, "Task2" : { "TaskName" : "DigiHLT_new", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgo" : "LumiBased", "CMSSWVersion" : "CMSSW_5_2_6", "GlobalTag" : "GR_39_P_V5:All", "PrimaryDataset" : "PURelValTTBar", "KeepOutput" : False }, "Task3" : { "TaskName" : "DigiHLT_ref", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgo" : "EventBased", "CMSSWVersion" : "CMSSW_5_2_7", "GlobalTag" : "GR_40_P_V5:All", "AcquisitionEra" : "ReleaseValidationNewConditions", "ProcessingVersion" : 3, "ProcessingString" : "Test", "KeepOutput" : False }, "Task4" : { "TaskName" : "Reco", "InputTask" : "DigiHLT_new", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "SplittingAlgo" : "FileBased", "TransientOutputModules" : ["writeRECO"] }, "Task5" : { "TaskName" : "ALCAReco", "InputTask" : "DigiHLT_ref", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['ALCAReco'], "SplittingAlgo" : "LumiBased", }, "Task6" : { "TaskName" : "Skims", "InputTask" : "Reco", "InputFromOutputModule" : "writeRECO", "ConfigCacheID" : processorDocs['Skims'], "SplittingAlgo" : "LumiBased", } } testArguments.update(arguments) arguments = testArguments print arguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction("PullingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg)
def testPileupTaskChain(self): """ Test for multithreaded task chains where each step may run with a different number of cores """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 2, "Task1" :{ "InputDataset" : "/cosmics/whatever-input-v1/GEN-SIM", "TaskName" : "DIGI", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgo" : "LumiBased", "LumisPerJob": 4, "MCPileup": "/some/cosmics-mc-v1/GEN-SIM", "DeterministicPileup": True, "CMSSWVersion" : "CMSSW_5_2_6", "GlobalTag" : "GR_39_P_V5:All", "PrimaryDataset" : "PURelValTTBar", "AcquisitionEra": "CMSSW_5_2_6", "ProcessingString": "ProcStr_Task1" }, "Task2" : { "TaskName" : "RECO", "InputTask" : "DIGI", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "DataPileup": "/some/minbias-data-v1/GEN-SIM", "SplittingAlgo" : "LumiBased", "LumisPerJob": 2, "GlobalTag": "GR_R_62_V3::All", "AcquisitionEra": "CMSSW_5_2_7", "ProcessingString": "ProcStr_Task2" }, } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() self.workload = factory.factoryWorkloadConstruction("PullingTheChain", arguments) firstTask = self.workload.getTaskByPath("/PullingTheChain/DIGI") cmsRunStep = firstTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "data")) self.assertEqual(pileupData.mc.dataset, ["/some/cosmics-mc-v1/GEN-SIM"]) splitting = firstTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) secondTask = self.workload.getTaskByPath("/PullingTheChain/DIGI/DIGIMergewriteRAWDIGI/RECO") cmsRunStep = secondTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "mc")) self.assertEqual(pileupData.data.dataset, ["/some/minbias-data-v1/GEN-SIM"]) splitting = secondTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"])
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1":[[2,4], [8,50]], "2":[[100,200], [210,210]]} lumiDict2 = {"1":[[2,4], [8,40]], "2":[[100,150], [210,210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 4, "Task1" :{ "TaskName" : "DigiHLT", "ConfigCacheID" : processorDocs['DigiHLT'], "InputDataset" : "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo" : "FileBased", }, "Task2" : { "TaskName" : "Reco", "InputTask" : "DigiHLT", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "SplittingAlgo" : "FileBased", "GlobalTag" : "GlobalTagForReco", "CMSSWVersion" : "CMSSW_3_1_2", "ScramArch" : "CompatibleRECOArch", "PrimaryDataset" : "ZeroBias", "LumiList": lumiDict2, }, "Task3" : { "TaskName" : "ALCAReco", "InputTask" : "Reco", "InputFromOutputModule" : "writeALCA", "ConfigCacheID" : processorDocs['ALCAReco'], "SplittingAlgo" : "FileBased", "GlobalTag" : "GlobalTagForALCAReco", "CMSSWVersion" : "CMSSW_3_1_3", "ScramArch" : "CompatibleALCAArch", }, "Task4" : { "TaskName" : "Skims", "InputTask" : "Reco", "InputFromOutputModule" : "writeRECO", "ConfigCacheID" : processorDocs['Skims'], "SplittingAlgo" : "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction("YankingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") self.assertEqual(lumiDict2, reco.getLumiMask()) recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco") self.assertEqual(lumiDict, alca.getLumiMask()) alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims") skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue("/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI not in output datasets") self.assertTrue("/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI not in output datasets") self.assertTrue("/ZeroBias/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/ZeroBias/ReleaseValidation-reco-v1/RECO not in output datasets") self.assertTrue("/ZeroBias/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/ZeroBias/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue("/ZeroBias/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO not in output datasets") for i in range(1, 5): self.assertTrue("/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): self.assertTrue("/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 6, "IgnoredOutputModules" : ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1" :{ "TaskName" : "GenSim", "ConfigCacheID" : generatorDoc, "SplittingAlgo" : "EventBased", "RequestNumEvents" : 10000, "Seeding" : "AutomaticSeeding", "PrimaryDataset" : "RelValTTBar", }, "Task2" : { "TaskName" : "DigiHLT_new", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgo" : "LumiBased", "CMSSWVersion" : "CMSSW_5_2_6", "GlobalTag" : "GR_39_P_V5:All", "PrimaryDataset" : "PURelValTTBar", "KeepOutput" : False }, "Task3" : { "TaskName" : "DigiHLT_ref", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgo" : "EventBased", "CMSSWVersion" : "CMSSW_5_2_7", "GlobalTag" : "GR_40_P_V5:All", "AcquisitionEra" : "ReleaseValidationNewConditions", "ProcessingVersion" : 3, "ProcessingString" : "Test", "KeepOutput" : False }, "Task4" : { "TaskName" : "Reco", "InputTask" : "DigiHLT_new", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "SplittingAlgo" : "FileBased", "TransientOutputModules" : ["writeRECO"] }, "Task5" : { "TaskName" : "ALCAReco", "InputTask" : "DigiHLT_ref", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['ALCAReco'], "SplittingAlgo" : "LumiBased", }, "Task6" : { "TaskName" : "Skims", "InputTask" : "Reco", "InputFromOutputModule" : "writeRECO", "ConfigCacheID" : processorDocs['Skims'], "SplittingAlgo" : "LumiBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction("PullingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "GenSim", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) firstTask = self.workload.getTaskByPath("/PullingTheChain/GenSim") self._checkTask(firstTask, arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco"), arguments['Task4'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref/ALCAReco"), arguments['Task5'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco/Skims"), arguments['Task6'], arguments) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 11, "Number of output datasets doesn't match") self.assertTrue("/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM" in outputDatasets, "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM not in output datasets") self.assertFalse("/RelValTTBar/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-reco-v1/RECO in output datasets") self.assertTrue("/RelValTTBar/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/RelValTTBar/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue("/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO not in output datasets") for i in range(1, 5): self.assertTrue("/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): if i == 2: continue self.assertTrue("/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1": [[2, 4], [8, 50]], "2": [[100, 200], [210, 210]]} lumiDict2 = {"1": [[2, 4], [8, 40]], "2": [[100, 150], [210, 210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 4, "Task1": { "TaskName": "DigiHLT", "ConfigCacheID": processorDocs['DigiHLT'], "InputDataset": "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo": "FileBased", }, "Task2": { "TaskName": "Reco", "InputTask": "DigiHLT", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForReco", "CMSSWVersion": "CMSSW_3_1_2", "ScramArch": "CompatibleRECOArch", "PrimaryDataset": "ZeroBias", "LumiList": lumiDict2, }, "Task3": { "TaskName": "ALCAReco", "InputTask": "Reco", "InputFromOutputModule": "writeALCA", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForALCAReco", "CMSSWVersion": "CMSSW_3_1_3", "ScramArch": "CompatibleALCAArch", }, "Task4": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg)
def test1TaskMemCoresSettings(self): """ _test1StepMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all steps. Single step in a task. """ generatorDoc = makeGeneratorConfig(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "TaskChain": 1, "Task1": { "TaskName": "TaskOne", "ConfigCacheID": generatorDoc, "RequestNumEvents": 10000, "PrimaryDataset": "RelValTTBar", }, } testArguments.update(arguments) factory = TaskChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) taskPaths = ['/TestTaskChainWorkload/TaskOne', '/TestTaskChainWorkload/TaskOne/LogCollectForTaskOne', '/TestTaskChainWorkload/TaskOne/TaskOneMergewriteGENSIM', '/TestTaskChainWorkload/TaskOne/TaskOneMergewriteGENSIM/TaskOnewriteGENSIMMergeLogCollect', '/TestTaskChainWorkload/TaskOne/TaskOneCleanupUnmergedwriteGENSIM'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() in ('LogCollect'): stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) # now play with cores at top level testArguments['Multicore'] = 2 testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments['Multicore']) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() in ('LogCollect'): stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) # last but not least, play with cores at task level testArguments['Task1']['Multicore'] = 2 testArguments.pop('Multicore', None) testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments['Task1']['Multicore']) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() in ('LogCollect'): stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() testArguments.update(createMultiGTArgs()) lumiDict = {"1":[[2,4], [8,50]], "2":[[100,200], [210,210]]} testArguments["CouchURL"] = self.testInit.couchUrl testArguments["CouchDBName"] = self.testInit.couchDbName testArguments["Task1"]["LumiList"] = lumiDict testArguments["Task1"]["ConfigCacheID"] = processorDocs['DigiHLT'] testArguments["Task2"]["ConfigCacheID"] = processorDocs['Reco'] testArguments["Task3"]["ConfigCacheID"] = processorDocs['ALCAReco'] testArguments["Task4"]["ConfigCacheID"] = processorDocs['Skims'] arguments = testArguments factory = TaskChainWorkloadFactory() self.workload = factory.factoryWorkloadConstruction("YankingTheChain", arguments) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask().getCompactList()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco") alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims") skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue("/MinimumBias/ReleaseValidation-RawDigiFilter-FAKE-v1/RAW-DIGI" in outputDatasets) self.assertTrue("/MinimumBias/ReleaseValidation-RawDebugDigiFilter-FAKE-v1/RAW-DEBUG-DIGI" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-reco-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-AOD-FAKE-v1/AOD" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-alca-FAKE-v1/ALCARECO" in outputDatasets) for i in range(1, 5): self.assertTrue("/MinimumBias/ReleaseValidation-alca%d-FAKE-v1/ALCARECO" % i in outputDatasets) for i in range(1, 6): self.assertTrue("/MinimumBias/ReleaseValidation-skim%d-FAKE-v1/RECO-AOD" % i in outputDatasets) return
def test1TaskMemCoresSettings(self): """ _test1TaskMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all steps. Single step in a task. """ generatorDoc = makeGeneratorConfig(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "ConfigCacheUrl": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "TaskChain": 1, "Task1": { "TaskName": "TaskOne", "ConfigCacheID": generatorDoc, "RequestNumEvents": 10000, "PrimaryDataset": "RelValTTBar", }, } testArguments.update(arguments) factory = TaskChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) taskPaths = ['/TestTaskChainWorkload/TaskOne', '/TestTaskChainWorkload/TaskOne/LogCollectForTaskOne', '/TestTaskChainWorkload/TaskOne/TaskOneMergewriteGENSIM', '/TestTaskChainWorkload/TaskOne/TaskOneMergewriteGENSIM/TaskOnewriteGENSIMMergeLogCollect', '/TestTaskChainWorkload/TaskOne/TaskOneCleanupUnmergedwriteGENSIM'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() == 'LogCollect': stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # now play with cores at top level testArguments['Multicore'] = 2 testArguments['EventStreams'] = 2 testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments['Multicore']) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments["EventStreams"]) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() == 'LogCollect': stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # last but not least, play with cores at task level testArguments['Task1']['Multicore'] = 2 testArguments['Task1']['EventStreams'] = 2 testArguments.pop('Multicore', None) testArguments.pop('EventStreams', None) testWorkload = factory.factoryWorkloadConstruction("TestTaskChainWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) if taskObj.taskType() in ('Production', 'Processing'): for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments['Task1']['Multicore']) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments['Task1']['EventStreams']) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) elif taskObj.taskType() == 'LogCollect': stepHelper = taskObj.getStepHelper('logCollect1') self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() testArguments.update(createMultiGTArgs()) lumiDict = {"1": [[2, 4], [8, 50]], "2": [[100, 200], [210, 210]]} testArguments["ConfigCacheUrl"] = self.testInit.couchUrl testArguments["CouchDBName"] = self.testInit.couchDbName testArguments["Task1"]["LumiList"] = lumiDict testArguments["Task1"]["ConfigCacheID"] = processorDocs['DigiHLT'] testArguments["Task2"]["ConfigCacheID"] = processorDocs['Reco'] testArguments["Task3"]["ConfigCacheID"] = processorDocs['ALCAReco'] testArguments["Task4"]["ConfigCacheID"] = processorDocs['Skims'] arguments = testArguments factory = TaskChainWorkloadFactory() self.workload = factory.factoryWorkloadConstruction("YankingTheChain", arguments) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask().getCompactList()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco") alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims") skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue("/BprimeJetToBZ_M800GeV_Tune4C_13TeV-madgraph-tauola/ReleaseValidation-RawDigiFilter-FAKE-v1/RAW-DIGI" in outputDatasets) self.assertTrue("/BprimeJetToBZ_M800GeV_Tune4C_13TeV-madgraph-tauola/ReleaseValidation-RawDebugDigiFilter-FAKE-v1/RAW-DEBUG-DIGI" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-reco-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-AOD-FAKE-v1/AOD" in outputDatasets) self.assertTrue("/ZeroBias/ReleaseValidation-alca-FAKE-v1/ALCARECO" in outputDatasets) for i in range(1, 5): self.assertTrue("/BprimeJetToBZ_M800GeV_Tune4C_13TeV-madgraph-tauola/ReleaseValidation-alca%d-FAKE-v1/ALCARECO" % i in outputDatasets) for i in range(1, 6): self.assertTrue("/BprimeJetToBZ_M800GeV_Tune4C_13TeV-madgraph-tauola/ReleaseValidation-skim%d-FAKE-v1/RECO-AOD" % i in outputDatasets) return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1":[[2,4], [8,50]], "2":[[100,200], [210,210]]} lumiDict2 = {"1":[[2,4], [8,40]], "2":[[100,150], [210,210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 4, "Task1" :{ "TaskName" : "DigiHLT", "ConfigCacheID" : processorDocs['DigiHLT'], "InputDataset" : "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo" : "FileBased", }, "Task2" : { "TaskName" : "Reco", "InputTask" : "DigiHLT", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "SplittingAlgo" : "FileBased", "GlobalTag" : "GlobalTagForReco", "CMSSWVersion" : "CMSSW_3_1_2", "ScramArch" : "CompatibleRECOArch", "PrimaryDataset" : "ZeroBias", "LumiList": lumiDict2, }, "Task3" : { "TaskName" : "ALCAReco", "InputTask" : "Reco", "InputFromOutputModule" : "writeALCA", "ConfigCacheID" : processorDocs['ALCAReco'], "SplittingAlgo" : "FileBased", "GlobalTag" : "GlobalTagForALCAReco", "CMSSWVersion" : "CMSSW_3_1_3", "ScramArch" : "CompatibleALCAArch", }, "Task4" : { "TaskName" : "Skims", "InputTask" : "Reco", "InputFromOutputModule" : "writeRECO", "ConfigCacheID" : processorDocs['Skims'], "SplittingAlgo" : "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction("YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg)
def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 6, "IgnoredOutputModules": ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1": { "TaskName": "GenSim", "ConfigCacheID": generatorDoc, "SplittingAlgo": "EventBased", "RequestNumEvents": 10000, "Seeding": "AutomaticSeeding", "PrimaryDataset": "RelValTTBar", }, "Task2": { "TaskName": "DigiHLT_new", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "KeepOutput": False }, "Task3": { "TaskName": "DigiHLT_ref", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "EventBased", "CMSSWVersion": "CMSSW_5_2_7", "GlobalTag": "GR_40_P_V5:All", "AcquisitionEra": "ReleaseValidationNewConditions", "ProcessingVersion": 3, "ProcessingString": "Test", "KeepOutput": False }, "Task4": { "TaskName": "Reco", "InputTask": "DigiHLT_new", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "TransientOutputModules": ["writeRECO"] }, "Task5": { "TaskName": "ALCAReco", "InputTask": "DigiHLT_ref", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "LumiBased", }, "Task6": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "LumiBased", } } testArguments.update(arguments) arguments = testArguments print arguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg)
def testPileupTaskChain(self): """ Test for multithreaded task chains where each step may run with a different number of cores """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 2, "Task1": { "InputDataset": "/cosmics/whatever-input-v1/GEN-SIM", "TaskName": "DIGI", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "LumisPerJob": 4, "MCPileup": "/some/cosmics-mc-v1/GEN-SIM", "DeterministicPileup": True, "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "AcquisitionEra": "CMSSW_5_2_6", "ProcessingString": "ProcStr_Task1" }, "Task2": { "TaskName": "RECO", "InputTask": "DIGI", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "DataPileup": "/some/minbias-data-v1/GEN-SIM", "SplittingAlgo": "LumiBased", "LumisPerJob": 2, "GlobalTag": "GR_R_62_V3::All", "AcquisitionEra": "CMSSW_5_2_7", "ProcessingString": "ProcStr_Task2" }, } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) firstTask = self.workload.getTaskByPath("/PullingTheChain/DIGI") cmsRunStep = firstTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "data")) self.assertEqual(pileupData.mc.dataset, ["/some/cosmics-mc-v1/GEN-SIM"]) splitting = firstTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) secondTask = self.workload.getTaskByPath( "/PullingTheChain/DIGI/DIGIMergewriteRAWDIGI/RECO") cmsRunStep = secondTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "mc")) self.assertEqual(pileupData.data.dataset, ["/some/minbias-data-v1/GEN-SIM"]) splitting = secondTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"])
def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 6, "IgnoredOutputModules": ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1": { "TaskName": "GenSim", "ConfigCacheID": generatorDoc, "SplittingAlgo": "EventBased", "RequestNumEvents": 10000, "Seeding": "AutomaticSeeding", "PrimaryDataset": "RelValTTBar", }, "Task2": { "TaskName": "DigiHLT_new", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "KeepOutput": False }, "Task3": { "TaskName": "DigiHLT_ref", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "EventBased", "CMSSWVersion": "CMSSW_5_2_7", "GlobalTag": "GR_40_P_V5:All", "AcquisitionEra": "ReleaseValidationNewConditions", "ProcessingVersion": 3, "ProcessingString": "Test", "KeepOutput": False }, "Task4": { "TaskName": "Reco", "InputTask": "DigiHLT_new", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "TransientOutputModules": ["writeRECO"] }, "Task5": { "TaskName": "ALCAReco", "InputTask": "DigiHLT_ref", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "LumiBased", }, "Task6": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "LumiBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "GenSim", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) firstTask = self.workload.getTaskByPath("/PullingTheChain/GenSim") self._checkTask(firstTask, arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref"), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco" ), arguments['Task4'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref/ALCAReco" ), arguments['Task5'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco/Skims" ), arguments['Task6'], arguments) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 11, "Number of output datasets doesn't match") self.assertTrue( "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM" in outputDatasets, "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM not in output datasets" ) self.assertFalse( "/RelValTTBar/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-reco-v1/RECO in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/RelValTTBar/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): if i == 2: continue self.assertTrue( "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1": [[2, 4], [8, 50]], "2": [[100, 200], [210, 210]]} lumiDict2 = {"1": [[2, 4], [8, 40]], "2": [[100, 150], [210, 210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 4, "Task1": { "TaskName": "DigiHLT", "ConfigCacheID": processorDocs['DigiHLT'], "InputDataset": "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo": "FileBased", }, "Task2": { "TaskName": "Reco", "InputTask": "DigiHLT", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForReco", "CMSSWVersion": "CMSSW_3_1_2", "ScramArch": "CompatibleRECOArch", "PrimaryDataset": "ZeroBias", "LumiList": lumiDict2, }, "Task3": { "TaskName": "ALCAReco", "InputTask": "Reco", "InputFromOutputModule": "writeALCA", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForALCAReco", "CMSSWVersion": "CMSSW_3_1_3", "ScramArch": "CompatibleALCAArch", }, "Task4": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "YankingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask( self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") self.assertEqual(lumiDict2, reco.getLumiMask()) recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ) self.assertEqual(lumiDict, alca.getLumiMask()) alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ) skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue( "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI not in output datasets" ) self.assertTrue( "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI not in output datasets" ) self.assertTrue( "/ZeroBias/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/ZeroBias/ReleaseValidation-reco-v1/RECO not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/ZeroBias/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): self.assertTrue( "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return