def testMonteCarlo(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testSingleStepReDigi(self): """ _testSingleStepReDigi_ Verify that a single step ReDigi workflow can be created and installed correctly into WMBS. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[2] defaultArguments["StepTwoConfigCacheID"] = None defaultArguments["StepThreeConfigCacheID"] = None testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testThreeStepChainedReDigi(self): """ _testThreeStepChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW and RECO data can be created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False defaultArguments["KeepStepTwoOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() self.verifyKeepAOD() return
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testThreeStepChainedReDigi(self): """ _testThreeStepChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW and RECO data can be created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False defaultArguments["KeepStepTwoOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") self.assertTrue(len(testWorkload.getTopLevelTask()) == 1, "Error: Wrong number of top level tasks.") topLevelTask = testWorkload.getTopLevelTask()[0] topLevelStep = topLevelTask.steps() cmsRun2Step = topLevelStep.getStep("cmsRun2").getTypeHelper() self.assertTrue(len(cmsRun2Step.listOutputModules()) == 2, "Error: Wrong number of output modules in cmsRun2.") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testCombinedReDigiRecoConfig(self): """ _testCombinedReDigiRecoConfig_ Verify that a ReDigi workflow that uses a single step one config installs into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs(combinedStepOne = True) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[2] defaultArguments["StepThreeConfigCacheID"] = None defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput" testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testRelValMCWithPileup(self): """ Configure, instantiate, install into WMBS and check that the subscriptions in WMBS are setup correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "relvalmc_t" defaultArguments["GenOutputModuleName"] = "OutputA" defaultArguments["StepOneOutputModuleName"] = "OutputB" defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig() defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig() defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() # add pile up information - for the generation task defaultArguments["PileupConfig"] = {"cosmics": ["/some/cosmics/dataset1","/some/cosmics/dataset2"], "minbias": ["/some/minbias/dataset3"]} testWorkload = relValMCWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # now run the tests on single workload instance installed into WMBS # each of the subtests is dealing with specific tasks self._generationTaskTest() self._stepOneTaskTest() self._stepTwoTaskTest()
def testCombinedReDigiRecoConfig(self): """ _testCombinedReDigiRecoConfig_ Verify that a ReDigi workflow that uses a single step one config installs into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs(combinedStepOne = True) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[2] defaultArguments["StepThreeConfigCacheID"] = None defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput" testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() self.verifyDiscardRAW() return
def testRelValMCWithPileup(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() # add pile up configuration defaultArguments["PileupConfig"] = { "mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "data": ["/some/minbias/dataset3"] } testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testRelValMCWithPileup(self): """ Configure, instantiate, install into WMBS and check that the subscriptions in WMBS are setup correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "relvalmc_t" defaultArguments["GenOutputModuleName"] = "OutputA" defaultArguments["StepOneOutputModuleName"] = "OutputB" defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig() defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig() defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() # add pile up information - for the generation task defaultArguments["PileupConfig"] = { "cosmics": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "minbias": ["/some/minbias/dataset3"], } testWorkload = relValMCWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() # now run the tests on single workload instance installed into WMBS # each of the subtests is dealing with specific tasks self._generationTaskTest() self._stepOneTaskTest() self._stepTwoTaskTest()
def testRelValMC(self): """ Configure, instantiate, install into WMBS and check that the subscriptions in WMBS are setup correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "relvalmc_t" defaultArguments["GenOutputModuleName"] = "OutputA" defaultArguments["StepOneOutputModuleName"] = "OutputB" defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig() defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig() defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() testWorkload = relValMCWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # now run the tests on single workload instance installed into WMBS # each of the subtests is dealing with specific tasks self._generationTaskTest() self._stepTwoTaskTest() self._stepOneTaskTest() return
def testRelValMCWithPileup(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() # add pile up configuration defaultArguments["PileupConfig"] = {"mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "data": ["/some/minbias/dataset3"]} testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testMonteCarloExtension(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first event and lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstEvent"] = 3571428573 defaultArguments["FirstLumi"] = 26042 defaultArguments["TimePerEvent"] = 15 defaultArguments["FilterEfficiency"] = 0.014 defaultArguments["TotalTime"] = 28800 initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath( '/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testMonteCarlo(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProdConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() self._commonMonteCarloTest()
def testMonteCarloExtension(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first event and lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstEvent"] = 3571428573 defaultArguments["FirstLumi"] = 26042 defaultArguments["TimePerEvent"] = 15 defaultArguments["FilterEfficiency"] = 0.014 defaultArguments["TotalTime"] = 28800 initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath('/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testLHEStepZero(self): """ _testLHEStepZero_ Make sure that the workload can be created and complies with the common MonteCarlo test """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest()
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") # Verify that pileup is configured for both of the cmsRun steps in the # top level task. topLevelTask = testWorkload.getTopLevelTask()[0] cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1") cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2") cmsRun1PileupConfig = cmsRun1Helper.getPileup() cmsRun2PileupConfig = cmsRun2Helper.getPileup() self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset") self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testLHEStepZero(self): """ _testLHEStepZero_ Make sure that the workload can be created and complies with the common MonteCarlo test """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest()
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. Check that we can drop an output module. """ testArgs = getTestArguments() testArgs['TransientOutputModules'] = ['RECOoutput'] testWorkload = dataProcessingWorkload("TestWorkload", testArgs) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "ALCARECOoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod in testArgs["TransientOutputModules"]: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) else: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: if goldenOutputMod in testArgs["TransientOutputModules"]: # No merge for this output module continue mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") # No merge workflow should exist in WMBS self.assertRaises(IndexError, recoMergeWorkflow.load) unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "ALCARECOoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig( ) defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual( len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] #Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testAnalysis(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchUrl"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "analysis_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 'v1' analysisProcessingFactory = AnalysisWorkloadFactory() testWorkload = analysisProcessingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Analysis") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") output = procWorkflow.outputMap["output"][0]["output_fileset"] mergedOutput = procWorkflow.outputMap["output"][0]["merged_output_fileset"] output.loadData() mergedOutput.loadData() self.assertEqual(output.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + output.name) self.assertEqual(mergedOutput.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + mergedOutput.name) topLevelFileset = Fileset(name = "TestWorkload-Analysis-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Analysis", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/Analysis/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Analysis/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname = "RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") releasePromptRecoDAO = daoFactory(classname = "RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname = "RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction = False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname = "RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction = False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append( { 'RUN' : run, 'PRIMDS' : dataset, 'NOW' : int(time.time()) } ) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : dataset, 'SCENARIO' : datasetConfig.Scenario } ) if datasetConfig.CMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : datasetConfig.CMSSWVersion } ) alcaSkim = None if len(datasetConfig.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.AlcaSkims) dqmSeq = None if len(datasetConfig.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.DqmSequences) datasetConfig.ScramArch = tier0Config.Global.ScramArches.get(datasetConfig.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRecoConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'DO_RECO' : int(datasetConfig.DoReco), 'RECO_SPLIT' : datasetConfig.RecoSplit, 'WRITE_RECO' : int(datasetConfig.WriteRECO), 'WRITE_DQM' : int(datasetConfig.WriteDQM), 'WRITE_AOD' : int(datasetConfig.WriteAOD), 'PROC_VER' : datasetConfig.ProcessingVersion, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq, 'BLOCK_DELAY' : datasetConfig.BlockCloseDelay, 'CMSSW' : datasetConfig.CMSSWVersion, 'SCRAM_ARCH' : datasetConfig.ScramArch, 'MULTICORE' : datasetConfig.Multicore, 'GLOBAL_TAG' : datasetConfig.GlobalTag } ) phedexConfig = phedexConfigs[dataset] if datasetConfig.WriteAOD: custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if phedexConfig['tape_node'] != None: custodialSites.append(phedexConfig['tape_node']) if phedexConfig['disk_node'] != None: nonCustodialSites.append(phedexConfig['disk_node']) autoApproveSites.append(phedexConfig['disk_node']) subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "AOD" } ) if len(datasetConfig.AlcaSkims) > 0: if phedexConfig['tape_node'] != None: subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']], 'custodialSubType' : "Replica", 'nonCustodialSites' : [], 'autoApproveSites' : [], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "ALCARECO" } ) if datasetConfig.WriteDQM: if phedexConfig['tape_node'] != None: subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']], 'custodialSubType' : "Replica", 'nonCustodialSites' : [], 'autoApproveSites' : [], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "DQM" } ) if datasetConfig.WriteRECO: if phedexConfig['disk_node'] != None: subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [phedexConfig['disk_node']], 'autoApproveSites' : [phedexConfig['disk_node']], 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RECO" } ) writeTiers = [] if datasetConfig.WriteRECO: writeTiers.append("RECO") if datasetConfig.WriteAOD: writeTiers.append("AOD") if datasetConfig.WriteDQM: writeTiers.append("DQM") if len(datasetConfig.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if datasetConfig.Multicore: specArguments['Multicore'] = datasetConfig.Multicore specArguments['Memory'] = 1800 * datasetConfig.Multicore specArguments['RequestPriority'] = 0 specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion specArguments['ScramArch'] = datasetConfig.ScramArch specArguments['RunNumber'] = run specArguments['SplittingAlgo'] = "EventBased" specArguments['EventsPerJob'] = datasetConfig.RecoSplit specArguments['ProcessingString'] = "PromptReco" specArguments['ProcessingVersion'] = datasetConfig.ProcessingVersion specArguments['Scenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.GlobalTag specArguments['GlobalTagConnect'] = datasetConfig.GlobalTagConnect specArguments['InputDataset'] = "/%s/%s-%s/RAW" % (dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.AlcaSkims specArguments['DqmSequences'] = datasetConfig.DqmSequences specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['ValidStatus'] = "VALID" specArguments['EnableHarvesting'] = "True" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['BlockCloseDelay'] = datasetConfig.BlockCloseDelay specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist specArguments['SiteBlacklist'] = [] specArguments['TrustSitelists'] = "True" # not used, but needed by the validation specArguments['CouchURL'] = "http://*****:*****@cern.ch", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn = myThread.transaction.conn, transaction = True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id = fileset), alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset], conn = myThread.transaction.conn, transaction = True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in releasePromptReco() database transaction !") else: myThread.transaction.commit() return
def testTier1PromptReco(self): """ _testT1PromptReco_ Create a T1 Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = tier1promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
factory = TaskChainWorkloadFactory() try: self.workload = factory("YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) self.workload.setSpecUrl("somespec") self.workload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask( self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1']) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2']) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ), arguments['Task3']) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. """ testWorkload = dataProcessingWorkload("TestWorkload", getTestArguments()) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/DataProcessingoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/DataProcessingoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") return
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'redigi') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
} factory = TaskChainWorkloadFactory() try: self.workload = factory("YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) self.workload.setSpecUrl("somespec") self.workload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(self.workload, "SomeBlock") testWMBSHelper.createSubscription() self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco"), arguments['Task2']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco/ALCAReco"), arguments['Task3']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco/Skims"), arguments['Task4']) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) reco = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco") recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag'])
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{ "SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": { "files_per_job": 1, "include_parents": True }, "ConfigCacheID": skimConfig, "Scenario": None }] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.\ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. Check that we can drop an output module. """ testArgs = getTestArguments() testArgs['TransientOutputModules'] = ['RECOoutput'] testWorkload = dataProcessingWorkload("TestWorkload", testArgs) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "ALCARECOoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod in testArgs["TransientOutputModules"]: self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) else: self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: if goldenOutputMod in testArgs["TransientOutputModules"]: # No merge for this output module continue mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") # No merge workflow should exist in WMBS self.assertRaises(IndexError, recoMergeWorkflow.load) unmergedAlca = Fileset( name="/TestWorkload/DataProcessing/unmerged-ALCARECOoutput") unmergedAlca.loadData() alcaMergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlca, workflow=alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "ALCARECOoutput"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory( classname="RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction=False) if len(datasets) == 0: raise RuntimeError( "Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory( classname="RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory( classname="RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory( classname="RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory( classname="RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory( classname="RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory( classname="RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory( classname="RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory( classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = { 'RUN': run, 'STREAM': stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory( classname="RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction=False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Repack.CMSSWVersion}) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE': streamConfig.Repack.MinInputSize, 'MAX_SIZE': streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize, 'MAX_EVENTS': streamConfig.Repack.MaxInputEvents, 'MAX_FILES': streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay, 'CMSSW': streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append({'PRIMDS': specialDataset}) bindsStreamDataset.append({ 'RUN': run, 'PRIMDS': specialDataset, 'STREAM': stream }) bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset} bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': specialDataset, 'SCENARIO': streamConfig.Express.Scenario }) if streamConfig.Express.WriteDQM: outputModuleDetails.append({ 'dataTier': tier0Config.Global.DQMDataTier, 'eventContent': tier0Config.Global.DQMDataTier, 'primaryDataset': specialDataset }) bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode}) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': specialDataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': specialDataset }) alcaSkim = None if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append({ 'dataTier': "ALCARECO", 'eventContent': "ALCARECO", 'primaryDataset': specialDataset }) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN': run, 'STREAM': stream, 'NUM_PRODUCER': numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.CMSSWVersion}) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.RecoCMSSWVersion}) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Express.ProcessingVersion, 'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers), 'WRITE_DQM': streamConfig.Express.WriteDQM, 'GLOBAL_TAG': streamConfig.Express.GlobalTag, 'MAX_RATE': streamConfig.Express.MaxInputRate, 'MAX_EVENTS': streamConfig.Express.MaxInputEvents, 'MAX_SIZE': streamConfig.Express.MaxInputSize, 'MAX_FILES': streamConfig.Express.MaxInputFiles, 'MAX_LATENCY': streamConfig.Express.MaxLatency, 'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay, 'CMSSW': streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Express.ScramArch, 'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch, 'MULTICORE': streamConfig.Express.Multicore, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory( classname="RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute( run, stream, transaction=False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [216120, 216125, 216130]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[ datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append({ 'dataTier': "RAW", 'eventContent': "ALL", 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': datasetConfig.ArchivalNode, 'TAPE_NODE': datasetConfig.TapeNode, 'DISK_NODE': datasetConfig.DiskNode }) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( {'NODE': datasetConfig.ArchivalNode}) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append({'NODE': datasetConfig.TapeNode}) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append({'NODE': datasetConfig.DiskNode}) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RAW" }) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': "%s-Error" % dataset, 'dataTier': "RAW" }) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in ["ALCARECO", "DQM", "DQMIO"]: outputModuleDetails.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': dataset }) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments[ 'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments[ 'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments[ 'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments[ 'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments[ 'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments[ 'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments[ 'ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments[ 'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments[ 'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments[ 'MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[ 'backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments[ 'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments[ 'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in ['Bulk', 'Express']: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments[ 'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute( bindsPromptCalibration, conn=myThread.transaction.conn, transaction=True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute( bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn=myThread.transaction.conn, transaction=True) insertStreamStyleDAO.execute(bindsStreamStyle, conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: insertStreamFilesetDAO.execute(run, stream, filesetName, conn=myThread.transaction.conn, transaction=True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute( [fileset.id], conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping( ).items(): bindsRecoReleaseConfig.append({ 'RUN': run, 'PRIMDS': primds, 'FILESET': fileset, 'RECODELAY': promptRecoDelay[primds], 'RECODELAYOFFSET': promptRecoDelayOffset[primds] }) insertRecoReleaseConfigDAO.execute( bindsRecoReleaseConfig, conn=myThread.transaction.conn, transaction=True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute( [workflowName], injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction = False)[0] # # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs # if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False) if len(datasets) == 0: raise RuntimeError("Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = {'RUN' : run, 'STREAM' : stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } ) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE' : streamConfig.Repack.MinInputSize, 'MAX_SIZE' : streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize, 'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents, 'MAX_FILES' : streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay, 'CMSSW' : streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append( { 'PRIMDS' : specialDataset } ) bindsStreamDataset.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'STREAM' : stream } ) bindsSpecialDataset = { 'STREAM' : stream, 'PRIMDS' : specialDataset } bindsDatasetScenario.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'SCENARIO' : streamConfig.Express.Scenario } ) if "DQM" in streamConfig.Express.DataTiers: outputModuleDetails.append( { 'dataTier' : "DQM", 'eventContent' : "DQM", 'primaryDataset' : specialDataset } ) bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : specialDataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : specialDataset } ) alcaSkim = None if "ALCARECO" in streamConfig.Express.DataTiers: if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append( { 'dataTier' : "ALCARECO", 'eventContent' : "ALCARECO", 'primaryDataset' : specialDataset } ) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN' : run, 'STREAM' : stream, 'NUM_PRODUCER' : numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion) bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } ) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } ) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN' : run, 'STREAM' : stream, 'PROC_VER' : streamConfig.Express.ProcessingVersion, 'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers), 'GLOBAL_TAG' : streamConfig.Express.GlobalTag, 'MAX_RATE' : streamConfig.Express.MaxInputRate, 'MAX_EVENTS' : streamConfig.Express.MaxInputEvents, 'MAX_SIZE' : streamConfig.Express.MaxInputSize, 'MAX_FILES' : streamConfig.Express.MaxInputFiles, 'MAX_LATENCY' : streamConfig.Express.MaxLatency, 'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay, 'CMSSW' : streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH' : streamConfig.Express.ScramArch, 'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch, 'MULTICORE' : streamConfig.Express.Multicore, 'ALCA_SKIM' : alcaSkim, 'DQM_SEQ' : dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False) for dataset, paths in datasetTriggers.items(): if dataset == "Unassigned path": if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]: continue if stream == "A" and run in [ 216120, 216125, 216130 ]: continue datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append( { 'dataTier' : "RAW", 'eventContent' : "ALL", 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : datasetConfig.ArchivalNode, 'TAPE_NODE' : datasetConfig.TapeNode, 'DISK_NODE' : datasetConfig.DiskNode } ) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } ) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } ) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } ) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : dataset, 'dataTier' : "RAW" } ) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append( { 'custodialSites' : custodialSites, 'custodialSubType' : "Replica", 'nonCustodialSites' : nonCustodialSites, 'autoApproveSites' : autoApproveSites, 'priority' : "high", 'primaryDataset' : "%s-Error" % dataset, 'dataTier' : "RAW" } ) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in [ "ALCARECO", "DQM" ]: outputModuleDetails.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'selectEvents' : selectEvents, 'primaryDataset' : dataset } ) bindsPhEDExConfig.append( { 'RUN' : run, 'PRIMDS' : dataset, 'ARCHIVAL_NODE' : None, 'TAPE_NODE' : None, 'DISK_NODE' : expressPhEDExSubscribeNode } ) subscriptions.append( { 'custodialSites' : [], 'nonCustodialSites' : [ expressPhEDExSubscribeNode ], 'autoApproveSites' : [ expressPhEDExSubscribeNode ], 'priority' : "high", 'primaryDataset' : dataset } ) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments['RequestPriority'] = 0 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['Group'] = "unknown" specArguments['Requestor'] = "unknown" specArguments['RequestorDN'] = "unknown" specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments['RequestPriority'] = 0 specArguments['ProcessingString'] = "Express" specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: wmSpec.setOwnerDetails("*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn' : "*****@*****.**" } ) wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760, softTimeout = 604800, gracePeriod = 3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True) insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]: insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True) insertWorkflowMonitoringDAO.execute([fileset.id], conn = myThread.transaction.conn, transaction = True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping().items(): bindsRecoReleaseConfig.append( { 'RUN' : run, 'PRIMDS' : primds, 'FILESET' : fileset, 'RECODELAY' : promptRecoDelay[primds], 'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } ) insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError("Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
myFile.create() inputFileset.addFile(myFile) dbsFile = DBSBufferFile(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"], events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = "cmssrm.fnal.gov", status = "LOCAL") dbsFile.setDatasetPath(datasetPath) dbsFile.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown", psetHash = "Unknown", configContent = "Unknown") dbsFile.create() inputFileset.commit() inputFileset.markOpen(False) return myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name = workloadTask.getPathName()) inputFileset.create() inputDataset = workloadTask.inputDataset() inputDatasetPath = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) injectFilesFromDBS(inputFileset, inputDatasetPath) myWMBSHelper = WMBSHelper(workload, workloadTask.getPathName(), cachepath=os.getcwd()) myWMBSHelper.createSubscription(workloadTask, inputFileset) myThread.transaction.commit()
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments["ProcConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'lheproduction') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll only test the skims here. """ skimConfig = self.injectSkimConfig() dataProcArguments = getTestArguments() dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "outputRECORECO", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") return
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname="RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") releasePromptRecoDAO = daoFactory(classname="RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname="RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction=False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname="RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction=False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append({ 'RUN': run, 'PRIMDS': dataset, 'NOW': int(time.time()) }) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': dataset, 'SCENARIO': datasetConfig.Scenario }) if datasetConfig.CMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': datasetConfig.CMSSWVersion}) alcaSkim = None if len(datasetConfig.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.AlcaSkims) dqmSeq = None if len(datasetConfig.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.DqmSequences) datasetConfig.ScramArch = tier0Config.Global.ScramArches.get( datasetConfig.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRecoConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'DO_RECO': int(datasetConfig.DoReco), 'RECO_SPLIT': datasetConfig.RecoSplit, 'WRITE_RECO': int(datasetConfig.WriteRECO), 'WRITE_DQM': int(datasetConfig.WriteDQM), 'WRITE_AOD': int(datasetConfig.WriteAOD), 'PROC_VER': datasetConfig.ProcessingVersion, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq, 'BLOCK_DELAY': datasetConfig.BlockCloseDelay, 'CMSSW': datasetConfig.CMSSWVersion, 'SCRAM_ARCH': datasetConfig.ScramArch, 'MULTICORE': datasetConfig.Multicore, 'GLOBAL_TAG': datasetConfig.GlobalTag }) phedexConfig = phedexConfigs[dataset] if datasetConfig.WriteAOD: custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if phedexConfig['tape_node'] != None: custodialSites.append(phedexConfig['tape_node']) if phedexConfig['disk_node'] != None: nonCustodialSites.append(phedexConfig['disk_node']) autoApproveSites.append(phedexConfig['disk_node']) subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "AOD" }) if len(datasetConfig.AlcaSkims) > 0: if phedexConfig['tape_node'] != None: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "ALCARECO" }) if datasetConfig.WriteDQM: if phedexConfig['tape_node'] != None: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': tier0Config.Global.DQMDataTier }) if datasetConfig.WriteRECO: if phedexConfig['disk_node'] != None: subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [phedexConfig['disk_node']], 'autoApproveSites': [phedexConfig['disk_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RECO" }) writeTiers = [] if datasetConfig.WriteRECO: writeTiers.append("RECO") if datasetConfig.WriteAOD: writeTiers.append("AOD") if datasetConfig.WriteDQM: writeTiers.append(tier0Config.Global.DQMDataTier) if len(datasetConfig.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if datasetConfig.Multicore: specArguments['Multicore'] = datasetConfig.Multicore specArguments['Memory'] = 1800 * datasetConfig.Multicore specArguments['RequestPriority'] = 0 specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion specArguments['ScramArch'] = datasetConfig.ScramArch specArguments['RunNumber'] = run specArguments['SplittingAlgo'] = "EventBased" specArguments['EventsPerJob'] = datasetConfig.RecoSplit specArguments['ProcessingString'] = "PromptReco" specArguments[ 'ProcessingVersion'] = datasetConfig.ProcessingVersion specArguments['Scenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.GlobalTag specArguments[ 'GlobalTagConnect'] = datasetConfig.GlobalTagConnect specArguments['InputDataset'] = "/%s/%s-%s/RAW" % ( dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.AlcaSkims specArguments['DqmSequences'] = datasetConfig.DqmSequences specArguments[ 'UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo[ 'bulk_data_type'] specArguments['ValidStatus'] = "VALID" specArguments['EnableHarvesting'] = "True" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments[ 'BlockCloseDelay'] = datasetConfig.BlockCloseDelay specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist specArguments['SiteBlacklist'] = [] specArguments['TrustSitelists'] = "True" factory = PromptRecoWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn=myThread.transaction.conn, transaction=True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id=fileset), alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute([fileset], conn=myThread.transaction.conn, transaction=True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in releasePromptReco() database transaction !") else: myThread.transaction.commit() return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) testWorkload = storeResultsWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments.update({'CmsPath' :"/uscmst1/prod/sw/cms"}) testWorkload = storeResultsWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s."%unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll test the skims and DQMHarvest here. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ProcConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/DataProcessingMergeDQMoutputMergedDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) if len(inputFileset) < 1: raise Exception, "No files were selected!" inputFileset.commit() inputFileset.markOpen(False) return myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name = workloadTask.getPathName()) inputFileset.create() inputDataset = workloadTask.inputDataset() inputDatasetPath = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) injectFilesFromDBS(inputFileset, inputDatasetPath, options.RunWhitelist) myWMBSHelper = WMBSHelper(workload) myWMBSHelper.createSubscription(workloadTash.getPathName()) myThread.transaction.commit()
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper.createSubscription( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = [ "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM" ] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset( name="/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset( name="/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset=mergedRecoFileset, workflow=promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual( promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedPromptSkim, workflow=promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = [ "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual( cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive" ) promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect" ) promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=promptSkimLogCollect, workflow=promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset( name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=promptSkimMergeLogCollect, workflow=promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
} factory = TaskChainWorkloadFactory() try: self.workload = factory("YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) self.workload.setSpecUrl("somespec") self.workload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"), arguments['Task3']) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"), arguments['Task4']) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch'])
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = [ "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM" ] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged" ) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset( name="/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = [ "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset( name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' workload = promptrecoWorkload(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper.createSubscription(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = { 'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': [] } self.orderedStates = [ 'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done' ] # Populate WMStats self.wmstatsWriter.insertGenericRequest({'_id': workflowName}) self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = [ '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO' ] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset( name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%s' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s' % (topLevelTask, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-Merged' ) harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'lheproduction') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return