def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testD_Exhausted(self): """ _testExhausted_ Test that the system can exhaust jobs correctly """ workloadName = 'TestWorkload' self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName, 'WMSandbox', 'WMWorkload.pkl') testJobGroup = self.createTestJobGroup(nJobs=self.nJobs, retry_count=5, workloadPath=workloadPath) config = self.getConfig() config.ErrorHandler.maxRetries = 1 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') testSubscription = Subscription(id=1) # You should only have one testSubscription.load() testSubscription.loadData() # Do we have files to start with? self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 2) testErrorHandler = ErrorHandlerPoller(config) # set reqAuxDB None for the test, testErrorHandler.reqAuxDB = None testErrorHandler.setup(None) testErrorHandler.algorithm(None) idList = self.getJobs.execute(state='JobFailed') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='JobCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Exhausted') self.assertEqual(len(idList), self.nJobs) # Did we fail the files? self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 2)
def testD_Exhausted(self): """ _testExhausted_ Test that the system can exhaust jobs correctly """ workloadName = "TestWorkload" workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, "workloadTest", "TestWorkload", "WMSandbox", "WMWorkload.pkl") testJobGroup = self.createTestJobGroup(nJobs=self.nJobs, retry_count=5, workloadPath=workloadPath) config = self.getConfig() config.ErrorHandler.maxRetries = 1 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, "created", "new") changer.propagate(testJobGroup.jobs, "executing", "created") changer.propagate(testJobGroup.jobs, "complete", "executing") changer.propagate(testJobGroup.jobs, "jobfailed", "complete") testSubscription = Subscription(id=1) # You should only have one testSubscription.load() testSubscription.loadData() # Do we have files to start with? self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 2) testErrorHandler = ErrorHandlerPoller(config) testErrorHandler.setup(None) testErrorHandler.algorithm(None) idList = self.getJobs.execute(state="JobFailed") self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state="JobCooloff") self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state="Exhausted") self.assertEqual(len(idList), self.nJobs) # Did we fail the files? self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 2)
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ prodWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset = topLevelFileset, workflow = prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: unmergedOutput = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmergedOutput.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % outputName) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for outputName in ["OutputA", "OutputB"]: unmerged = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: mergeLogCollect = Fileset(name = "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (outputName, outputName)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"RECOoutput": "RECO", "DQMoutput": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = procWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset(name="/TestWorkload/DataProcessing/unmerged-DQMoutputDQM") unmergedDqm.loadData() dqmMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM") topLevelFileset.loadData() dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if "InputTask" in taskConf: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue(taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name = self.workload.name(), task = task.getPathName()) workflow.load() outputMods = outputModuleList(task) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name() + "Merge" + outputModule mergeWorkflow = Workflow(name = self.workload.name(), task = mergeTask) mergeWorkflow.load() self.assertTrue("Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual(unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-logArchive" self.assertEqual(mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual(umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep(stepName = "cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if "PrimaryDataset" in taskConf: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = ["AcquisitionEra, ProcessingString, ProcessingVersion"] allParts = True for part in processedDatasetParts: if part in taskConf: self.asserTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: print "yes" self.assertEqual("%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if "InputTask" not in taskConf: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"] taskFileset = Fileset(name = inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset = taskFileset, workflow = workflow) taskSubscription.loadData() if "InputTask" not in taskConf and "InputDataset" not in taskConf: # Production type self.assertEqual(taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch") else: self.assertEqual(taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def _stepTwoTaskTest(self): """ _stepTwoTaskTest_ """ alcaRecoTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo", ) alcaRecoTask.load() self.assertEqual(len(alcaRecoTask.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputC", "OutputD"] for o in goldenOutputMods: mergedOutput = alcaRecoTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = alcaRecoTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedOutput.name), ) self.assertEqual( unmergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name), ) logArchOutput = alcaRecoTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaRecoTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name), ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name), ) for o in goldenOutputMods: mergeTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o, ) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedMergeOutput.name), ) self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedMergeOutput.name), ) logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name), ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name), ) for o in goldenOutputMods: unmerged = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o ) unmerged.loadData() mergeTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o, ) mergeTask.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for o in goldenOutputMods: unmerged = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o ) unmerged.loadData() cleanupTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoCleanupUnmerged%s" % o, ) cleanupTask.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for o in goldenOutputMods: alcaRecoMergeLogCollect = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o ) alcaRecoMergeLogCollect.loadData() alcaRecoMergeLogCollectTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/StepTwo%sMergeLogCollect" % (o, o), ) alcaRecoMergeLogCollectTask.load() logCollectSub = Subscription(fileset=alcaRecoMergeLogCollect, workflow=alcaRecoMergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testExpress(self): """ _testExpress_ Create an Express workflow and verify it installs into WMBS correctly. """ testArguments = ExpressWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0" factory = ExpressWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Express", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) expressWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express") expressWorkflow.load() self.assertEqual(len(expressWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Express WF.") goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = expressWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = expressWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_StreamExpress_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Express/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = expressWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = {"ALCARECOStreamPromptCalibProd": "ALCAPROMPT", "ALCARECOStreamTkAlMinBias": "ALCARECO"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for dqmString in ["ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged"]: dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString dqmWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO"} for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Express") topLevelFileset.loadData() expressSubscription = Subscription(fileset=topLevelFileset, workflow=expressWorkflow) expressSubscription.loadData() self.assertEqual(expressSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual(expressSubscription["split_algo"], "Express", "Error: Wrong split algorithm. %s" % expressSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name="/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Express/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Express/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") expressLogCollect = Fileset(name="/TestWorkload/Express/unmerged-logArchive") expressLogCollect.loadData() expressLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressLogCollect") expressLogCollectWorkflow.load() logCollectSub = Subscription(fileset=expressLogCollect, workflow=expressLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO"] for goldenOutputMod in goldenOutputMods: expressMergeLogCollect = Fileset( name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod) expressMergeLogCollect.loadData() expressMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) expressMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=expressMergeLogCollect, workflow=expressMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for dqmStrings in [("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect"), ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect")]: dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \ dqmStrings[0] dqmHarvestLogCollect = Fileset(name=dqmFileset) dqmHarvestLogCollect.loadData() dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testExpress(self): """ _testExpress_ Create an Express workflow and verify it installs into WMBS correctly. """ testArguments = ExpressWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0" factory = ExpressWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Express", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) expressWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express") expressWorkflow.load() self.assertEqual( len(expressWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Express WF.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = expressWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = expressWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_StreamExpress_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = expressWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO" ) alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = { "ALCARECOStreamPromptCalibProd": "ALCAPROMPT", "ALCARECOStreamTkAlMinBias": "ALCARECO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for dqmString in [ "ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged" ]: dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString dqmWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Express") topLevelFileset.loadData() expressSubscription = Subscription(fileset=topLevelFileset, workflow=expressWorkflow) expressSubscription.loadData() self.assertEqual(expressSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( expressSubscription["split_algo"], "Express", "Error: Wrong split algorithm. %s" % expressSubscription["split_algo"]) alcaRecoFileset = Fileset( name= "/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO" ) alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name= "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO" ) mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") expressLogCollect = Fileset( name="/TestWorkload/Express/unmerged-logArchive") expressLogCollect.loadData() expressLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressLogCollect") expressLogCollectWorkflow.load() logCollectSub = Subscription(fileset=expressLogCollect, workflow=expressLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive" ) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect" ) alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [ "write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO" ] for goldenOutputMod in goldenOutputMods: expressMergeLogCollect = Fileset( name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod) expressMergeLogCollect.loadData() expressMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) expressMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=expressMergeLogCollect, workflow=expressMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for dqmStrings in [ ("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect" ), ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect" ) ]: dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \ dqmStrings[0] dqmHarvestLogCollect = Fileset(name=dqmFileset) dqmHarvestLogCollect.loadData() dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName) unmergedOutput.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % outputName) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for outputName in ["OutputA", "OutputB"]: unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: mergeLogCollect = Fileset(name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % ( outputName, outputName)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
class JobGroup(WMBSBase, WMJobGroup): """ A group (set) of Jobs """ def __init__(self, subscription=None, jobs=None, id=-1, uid=None, location=None): WMBSBase.__init__(self) WMJobGroup.__init__(self, subscription=subscription, jobs=jobs) self.id = id self.lastUpdate = None self.uid = uid if location != None: self.setSite(location) return def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() #overwrite base class self.output for WMBS fileset self.output = Fileset(name=makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname="JobGroup.New") action.execute(self.uid, self.subscription["id"], self.output.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = self.exists() self.commitTransaction(existingTransaction) return def delete(self): """ Remove a jobgroup from WMBS """ deleteAction = self.daofactory(classname="JobGroup.Delete") deleteAction.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return def exists(self): """ Does a jobgroup exist with id if id is not provided, use the uid, return the id """ if self.id != -1: action = self.daofactory(classname="JobGroup.ExistsByID") result = action.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) else: action = self.daofactory(classname="JobGroup.Exists") result = action.execute(uid=self.uid, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def load(self): """ _load_ Load all meta data associated with the JobGroup. This includes the JobGroup id, uid, last_update time, subscription id and output fileset id. Either the JobGroup id or uid must be specified for this to work. """ existingTransaction = self.beginTransaction() if self.id > 0: loadAction = self.daofactory(classname="JobGroup.LoadFromID") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) else: loadAction = self.daofactory(classname="JobGroup.LoadFromUID") result = loadAction.execute(self.uid, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = result["id"] self.uid = result["uid"] self.lastUpdate = result["last_update"] self.subscription = Subscription(id=result["subscription"]) self.subscription.load() self.output = Fileset(id=result["output"]) self.output.load() self.jobs = [] self.commitTransaction(existingTransaction) return def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id=jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def commit(self): """ _commit_ Write any new jobs to the database, creating them in the database if necessary. """ existingTransaction = self.beginTransaction() if self.id == -1: self.create() for j in self.newjobs: j.create(group=self) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def setSite(self, site_name=None): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname="JobGroup.SetSite") result = action.execute(site_name=site_name, jobGroupID=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def getSite(self): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname="JobGroup.GetSite") result = action.execute(jobGroupID=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def listJobIDs(self): """ Returns a list of job IDs Useful for times when threading the loading of jobGroups, where running loadData can overload UUID """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) jobIDList = [] for jobID in result: jobIDList.append(jobID["id"]) self.commitTransaction(existingTransaction) return jobIDList def commitBulk(self): """ Creates jobs in a group instead of singly, as is done in jobGroup.commit() """ myThread = threading.currentThread() if self.id == -1: myThread.transaction.begin() #existingTransaction = self.beginTransaction() self.create() #self.commitTransaction(existingTransaction) myThread.transaction.commit() existingTransaction = self.beginTransaction() listOfJobs = [] for job in self.newjobs: #First do all the header stuff if job["id"] != None: continue job["jobgroup"] = self.id if job["name"] == None: job["name"] = makeUUID() listOfJobs.append(job) bulkAction = self.daofactory(classname="Jobs.New") result = bulkAction.execute(jobList=listOfJobs) #Use the results of the bulk commit to get the jobIDs fileDict = {} for job in listOfJobs: job['id'] = result[job['name']] fileDict[job['id']] = [] for file in job['input_files']: fileDict[job['id']].append(file['id']) maskAction = self.daofactory(classname="Masks.New") maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \ transaction = self.existingTransaction()) fileAction = self.daofactory(classname="Jobs.AddFiles") fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \ transaction = self.existingTransaction()) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def getLocationsForJobs(self): """ Gets a list of the locations that jobs can run at """ if not self.exists(): return action = self.daofactory(classname="JobGroup.GetLocationsForJobs") result = action.execute(id=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) return result def __str__(self): """ __str__ Print out some information about the jobGroup as if jobGroup inherited from dict() """ d = { 'id': self.id, 'uid': self.uid, 'subscription': self.subscription, 'output': self.output, 'jobs': self.jobs, 'newjobs': self.newjobs } return str(d)
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. """ testWorkload = dataProcessingWorkload("TestWorkload", getTestArguments()) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/DataProcessingoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/DataProcessingoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") return
def _generationTaskTest(self): # retrieve task from the installed workflow genTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation") genTask.load() self.assertEqual(len(genTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputA"] for o in goldenOutputMods: mergedOutput = genTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = genTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong: %s" % mergedOutput.name, ) self.assertEqual( unmergedOutput.name, "/TestWorkload/Generation/unmerged-%s" % o, "Error: Unmerged output fileset is wrong.", ) logArchOutput = genTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = genTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.", ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.", ) for o in goldenOutputMods: mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong.", ) self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Unmerged output fileset is wrong.", ) logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name, ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong.", ) topLevelFileset = Fileset(name="TestWorkload-Generation-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=genTask) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for o in goldenOutputMods: unmergedOutput = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o) unmergedOutput.loadData() mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"], ) for o in goldenOutputMods: unmerged = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o) unmerged.loadData() cleanupTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationCleanupUnmerged%s" % o) cleanupTask.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") genLogCollect = Fileset(name="/TestWorkload/Generation/unmerged-logArchive") genLogCollect.loadData() genLogCollectTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenLogCollect") genLogCollectTask.load() logCollectSub = Subscription(fileset=genLogCollect, workflow=genLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") for o in goldenOutputMods: mergeLogCollect = Fileset(name="/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o) mergeLogCollect.loadData() mergeLogCollectTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s/Generation%sMergeLogCollect" % (o, o), ) mergeLogCollectTask.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll only test the skims here. """ skimConfig = self.injectSkimConfig() dataProcArguments = getTestArguments() dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "outputRECORECO", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock") testWMBSHelper.createSubscription() skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") return
class JobGroup(WMBSBase, WMJobGroup): """ A group (set) of Jobs """ def __init__(self, subscription = None, jobs = None, id = -1, uid = None, location = None): WMBSBase.__init__(self) WMJobGroup.__init__(self, subscription=subscription, jobs = jobs) self.id = id self.lastUpdate = None self.uid = uid if location != None: self.setSite(location) return def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() #overwrite base class self.output for WMBS fileset self.output = Fileset(name = makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname = "JobGroup.New") action.execute(self.uid, self.subscription["id"], self.output.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.id = self.exists() self.commitTransaction(existingTransaction) return def delete(self): """ Remove a jobgroup from WMBS """ deleteAction = self.daofactory(classname = "JobGroup.Delete") deleteAction.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return def exists(self): """ Does a jobgroup exist with id if id is not provided, use the uid, return the id """ if self.id != -1: action = self.daofactory(classname = "JobGroup.ExistsByID") result = action.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) else: action = self.daofactory(classname = "JobGroup.Exists") result = action.execute(uid = self.uid, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def load(self): """ _load_ Load all meta data associated with the JobGroup. This includes the JobGroup id, uid, last_update time, subscription id and output fileset id. Either the JobGroup id or uid must be specified for this to work. """ existingTransaction = self.beginTransaction() if self.id > 0: loadAction = self.daofactory(classname = "JobGroup.LoadFromID") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) else: loadAction = self.daofactory(classname = "JobGroup.LoadFromUID") result = loadAction.execute(self.uid, conn = self.getDBConn(), transaction = self.existingTransaction()) self.id = result["id"] self.uid = result["uid"] self.lastUpdate = result["last_update"] self.subscription = Subscription(id = result["subscription"]) self.subscription.load() self.output = Fileset(id = result["output"]) self.output.load() self.jobs = [] self.commitTransaction(existingTransaction) return def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id = jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def commit(self): """ _commit_ Write any new jobs to the database, creating them in the database if necessary. """ existingTransaction = self.beginTransaction() if self.id == -1: self.create() for j in self.newjobs: j.create(group = self) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def setSite(self, site_name = None): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.SetSite") result = action.execute(site_name = site_name, jobGroupID = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def getSite(self): """ Updates the jobGroup with a site_name from the wmbs_location table """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.GetSite") result = action.execute(jobGroupID = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def listJobIDs(self): """ Returns a list of job IDs Useful for times when threading the loading of jobGroups, where running loadData can overload UUID """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) jobIDList = [] for jobID in result: jobIDList.append(jobID["id"]) self.commitTransaction(existingTransaction) return jobIDList def commitBulk(self): """ Creates jobs in a group instead of singly, as is done in jobGroup.commit() """ myThread = threading.currentThread() if self.id == -1: myThread.transaction.begin() #existingTransaction = self.beginTransaction() self.create() #self.commitTransaction(existingTransaction) myThread.transaction.commit() existingTransaction = self.beginTransaction() listOfJobs = [] for job in self.newjobs: #First do all the header stuff if job["id"] != None: continue job["jobgroup"] = self.id if job["name"] == None: job["name"] = makeUUID() listOfJobs.append(job) bulkAction = self.daofactory(classname = "Jobs.New") result = bulkAction.execute(jobList = listOfJobs) #Use the results of the bulk commit to get the jobIDs fileDict = {} for job in listOfJobs: job['id'] = result[job['name']] fileDict[job['id']] = [] for file in job['input_files']: fileDict[job['id']].append(file['id']) maskAction = self.daofactory(classname = "Masks.New") maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \ transaction = self.existingTransaction()) fileAction = self.daofactory(classname = "Jobs.AddFiles") fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \ transaction = self.existingTransaction()) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return def getLocationsForJobs(self): """ Gets a list of the locations that jobs can run at """ if not self.exists(): return action = self.daofactory(classname = "JobGroup.GetLocationsForJobs") result = action.execute(id = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) return result def __str__(self): """ __str__ Print out some information about the jobGroup as if jobGroup inherited from dict() """ d = {'id': self.id, 'uid': self.uid, 'subscription': self.subscription, 'output': self.output, 'jobs': self.jobs, 'newjobs': self.newjobs} return str(d)
def _checkTask(self, task, taskConf): """ _checkTask_ Give the provided task instance the once over, make sure parentage is set correctly etc """ if taskConf.has_key("InputTask"): inpTask = taskConf['InputTask'] inpMod = taskConf['InputFromOutputModule'] inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath) workflow = Workflow(name = self.workload.name(), task = task.getPathName()) workflow.load() mods = outputModuleList(task) for mod in mods: filesets = workflow.outputMap[mod][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-Merged" if mod == "logArchive": mergedset = task.getPathName() + "/unmerged-" + mod unmergedset = task.getPathName() + "/unmerged-" + mod self.failUnless(mergedset == merged.name) self.failUnless(unmergedset == unmerged.name) if mod == "logArchive": continue mergeTask = task.getPathName() + "/" + task.name() + "Merge" + mod mergeWorkflow = Workflow(name = self.workload.name(), task = mergeTask) mergeWorkflow.load() if not mergeWorkflow.outputMap.has_key("Merged"): msg = "Merge workflow does not contain a Merged output key" self.fail(msg) mrgFileset = mergeWorkflow.outputMap['Merged'][0] mergedFS = mrgFileset['merged_output_fileset'] unmergedFS = mrgFileset['output_fileset'] mergedFS.loadData() unmergedFS.loadData() self.assertEqual(mergedFS.name, mergedset) self.assertEqual(unmergedFS.name, mergedset) mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-logArchive" self.assertEqual(mrgLogArch.name, archName) self.assertEqual(umrgLogArch.name,archName) # # test subscriptions made by this task if it is the first task # if taskConf.has_key("InputDataset") or taskConf.has_key("PrimaryDataset"): taskFileset = Fileset(name = "%s-%s-SomeBlock" % (self.workload.name(), task.name() )) taskFileset.loadData() taskSubscription = Subscription(fileset = taskFileset, workflow = workflow) taskSubscription.loadData() if taskConf.has_key("PrimaryDataset"): # generator type self.assertEqual(taskSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"], "Error: Wrong split algo.") if taskConf.has_key("InputDataset"): # processor type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for processing first task") self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch")
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments["ProcConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'lheproduction') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset = mergedRecoFileset, workflow = promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedPromptSkim, workflow = promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive") promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect") promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = promptSkimLogCollect, workflow = promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = promptSkimMergeLogCollect, workflow = promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual(len(repackWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual(mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual(repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset(name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in goldenOutputMods.items(): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def verifyDiscardRAW(self): """ _verifyDiscardRAW_ Verify that a workflow that discards the RAW was installed into WMBS correctly. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. Check that we can drop an output module. """ testArgs = getTestArguments() testArgs['TransientOutputModules'] = ['RECOoutput'] testWorkload = dataProcessingWorkload("TestWorkload", testArgs) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "ALCARECOoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod in testArgs["TransientOutputModules"]: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) else: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: if goldenOutputMod in testArgs["TransientOutputModules"]: # No merge for this output module continue mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") # No merge workflow should exist in WMBS self.assertRaises(IndexError, recoMergeWorkflow.load) unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "ALCARECOoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testAnalysis(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchUrl"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "analysis_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 'v1' analysisProcessingFactory = AnalysisWorkloadFactory() testWorkload = analysisProcessingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Analysis") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") output = procWorkflow.outputMap["output"][0]["output_fileset"] mergedOutput = procWorkflow.outputMap["output"][0]["merged_output_fileset"] output.loadData() mergedOutput.loadData() self.assertEqual(output.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + output.name) self.assertEqual(mergedOutput.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + mergedOutput.name) topLevelFileset = Fileset(name = "TestWorkload-Analysis-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Analysis", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/Analysis/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Analysis/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'reprocessing') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "DQMoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset(name = "/TestWorkload/DataProcessing/unmerged-DQMoutput") unmergedDqm.loadData() dqmMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDqm, workflow = dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "DQMoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def verifyKeepAOD(self): """ _verifyKeepAOD_ Verify that a workflow that only produces AOD in a single step was installed correctly into WMBS. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-aodOutputModule") stepTwoUnmergedAODFileset.loadData() stepTwoMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-Merged") stepTwoMergedAODFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeAODLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-logArchive") stepTwoMergeAODLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("aodOutputModule" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepTwoUnmergedAODFileset.id, "Error: AOD output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedaodOutputModule") stepTwoCleanupAODWorkflow.load() self.assertEqual(len(stepTwoCleanupAODWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupAODSub = Subscription(workflow = stepTwoCleanupAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoCleanupAODSub.loadData() self.assertEqual(stepTwoCleanupAODSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule") stepTwoMergeAODWorkflow.load() self.assertTrue("Merged" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepTwoMergeAODSub = Subscription(workflow = stepTwoMergeAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoMergeAODSub.loadData() self.assertEqual(stepTwoMergeAODSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeAODWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeAODWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll test the skims and DQMHarvest here. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'lheproduction') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return