def testGetOutputParentLFNs(self): """ _testGetOutputParentLFNs_ Verify that the getOutputDBSParentLFNs() method returns the correct parent LFNs. """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, merged = True) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, merged = True) testFileC = File(lfn = "/this/is/a/lfnC", size = 1024, events = 10, merged = False) testFileD = File(lfn = "/this/is/a/lfnD", size = 1024, events = 10, merged = False) testFileE = File(lfn = "/this/is/a/lfnE", size = 1024, events = 10, merged = True) testFileF = File(lfn = "/this/is/a/lfnF", size = 1024, events = 10, merged = True) testFileA.create() testFileB.create() testFileC.create() testFileD.create() testFileE.create() testFileF.create() testFileE.addChild(testFileC["lfn"]) testFileF.addChild(testFileD["lfn"]) testJobA = Job(name = "TestJob", files = [testFileA, testFileB]) testJobA["couch_record"] = "somecouchrecord" testJobA["location"] = "test.site.ch" testJobA.create(group = testJobGroup) testJobA.associateFiles() testJobB = Job(name = "TestJobB", files = [testFileC, testFileD]) testJobB["couch_record"] = "somecouchrecord" testJobB["location"] = "test.site.ch" testJobB.create(group = testJobGroup) testJobB.associateFiles() goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"] parentLFNs = testJobA.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing: %s" % goldenLFNs goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"] parentLFNs = testJobB.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing..." return
def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", pnn="somese.cern.ch") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() bogusSubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=1024 * 1000 * 1000, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) fileIV.addRun(Run(2, *[46])) fileIV.create() for file in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: mergeFileset.addFile(file) bogusFileset.addFile(file) mergeFileset.commit() bogusFileset.commit() return
myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) if len(inputFileset) < 1: raise Exception("No files were selected!") inputFileset.commit() inputFileset.markOpen(False) return myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name=workloadTask.getPathName()) inputFileset.create() inputDataset = workloadTask.inputDataset() inputDatasetPath = "/%s/%s/%s" % ( inputDataset.primary, inputDataset.processed, inputDataset.tier) injectFilesFromDBS(inputFileset, inputDatasetPath, options.RunWhitelist) myWMBSHelper = WMBSHelper(workload) myWMBSHelper._createSubscriptionsInWMBS(workloadTash.getPathName()) myThread.transaction.commit()
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName) unmergedOutput.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % outputName) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for outputName in ["OutputA", "OutputB"]: unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: mergeLogCollect = Fileset(name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % ( outputName, outputName)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testExpress(self): """ _testExpress_ Create an Express workflow and verify it installs into WMBS correctly. """ testArguments = ExpressWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0" factory = ExpressWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Express", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) expressWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express") expressWorkflow.load() self.assertEqual( len(expressWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Express WF.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = expressWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = expressWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_StreamExpress_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = expressWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO" ) alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = { "ALCARECOStreamPromptCalibProd": "ALCAPROMPT", "ALCARECOStreamTkAlMinBias": "ALCARECO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for dqmString in [ "ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged" ]: dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString dqmWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Express") topLevelFileset.loadData() expressSubscription = Subscription(fileset=topLevelFileset, workflow=expressWorkflow) expressSubscription.loadData() self.assertEqual(expressSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( expressSubscription["split_algo"], "Express", "Error: Wrong split algorithm. %s" % expressSubscription["split_algo"]) alcaRecoFileset = Fileset( name= "/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO" ) alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name= "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO" ) mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") expressLogCollect = Fileset( name="/TestWorkload/Express/unmerged-logArchive") expressLogCollect.loadData() expressLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressLogCollect") expressLogCollectWorkflow.load() logCollectSub = Subscription(fileset=expressLogCollect, workflow=expressLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive" ) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect" ) alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [ "write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO" ] for goldenOutputMod in goldenOutputMods: expressMergeLogCollect = Fileset( name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod) expressMergeLogCollect.loadData() expressMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) expressMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=expressMergeLogCollect, workflow=expressMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for dqmStrings in [ ("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect" ), ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect" ) ]: dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \ dqmStrings[0] dqmHarvestLogCollect = Fileset(name=dqmFileset) dqmHarvestLogCollect.loadData() dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput, tier in viewitems(goldenOutputMods): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput, tier in viewitems(goldenOutputMods): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testB_NoFileSplitNoHardLimit(self): """ _testB_NoFileSplitNoHardLimit_ In this case we don't split on file boundaries, check different combination of files make sure we make the most of the splitting, e.g. include many zero event files in a single job. """ splitter = SplitterFactory() # Create 100 files with 7 lumi per file and 0 events per lumi on average. testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False, nEventsPerFile=0) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # First test, the optimal settings are 360 events per job # As we have files with 0 events per lumi, this will configure the splitting to # a single job containing all files jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=360, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 1, "There should be 1 job") self.assertEqual(len(jobs[0]['input_files']), 100, "All 100 files must be in the job") self.assertEqual(jobs[0]['estimatedMemoryUsage'], 2300) self.assertEqual(jobs[0]['estimatedDiskUsage'], 0) self.assertEqual(jobs[0]['estimatedJobTime'], 0) # Create 7 files, each one with different lumi/event distributions testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 250, 0, 5, "T2_CH_CERN") testFileB = self.createFile("/this/is/file2", 600, 1, 1, "T2_CH_CERN") testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "T2_CH_CERN") testFileD = self.createFile("/this/is/file4", 100, 3, 1, "T2_CH_CERN") testFileE = self.createFile("/this/is/file5", 30, 4, 1, "T2_CH_CERN") testFileF = self.createFile("/this/is/file6", 10, 5, 1, "T2_CH_CERN") testFileG = self.createFile("/this/is/file7", 151, 6, 3, "T2_CH_CERN") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.addFile(testFileD) testFileset.addFile(testFileE) testFileset.addFile(testFileF) testFileset.addFile(testFileG) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Optimal settings are: jobs with 150 events per job # This means, the first file must be splitted in 3 lumis per job which would leave room # for another lumi in the second job, but the second file has a lumi too big for that # The 3rd job only contains the second file, the fourth and fifth job split the third file jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=150, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 7, "7 jobs must be in the jobgroup") self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0: [[0, 2]]}, "Wrong mask for the first job") self.assertEqual(jobs[0]["estimatedJobTime"], 150 * 12) self.assertEqual(jobs[0]["estimatedDiskUsage"], 150 * 400) self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0: [[3, 4]]}, "Wrong mask for the second job") self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12) self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400) self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1: [[1, 1]]}, "Wrong mask for the third job") self.assertEqual(jobs[2]["estimatedJobTime"], 600 * 12) self.assertEqual(jobs[2]["estimatedDiskUsage"], 600 * 400) self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2: [[4, 4]]}, "Wrong mask for the fourth job") self.assertEqual(jobs[3]["estimatedJobTime"], 600 * 12) self.assertEqual(jobs[3]["estimatedDiskUsage"], 600 * 400) self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2: [[5, 5]]}, "Wrong mask for the fifth job") self.assertEqual(jobs[4]["estimatedJobTime"], 600 * 12) self.assertEqual(jobs[4]["estimatedDiskUsage"], 600 * 400) self.assertEqual(jobs[5]["mask"].getRunAndLumis(), { 3: [[3, 3]], 4: [[4, 4]], 5: [[5, 5]] }, "Wrong mask for the sixth job") self.assertEqual(jobs[5]["estimatedJobTime"], 140 * 12) self.assertEqual(jobs[5]["estimatedDiskUsage"], 140 * 400) self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6: [[18, 20]]}, "Wrong mask for the seventh job") self.assertEqual(jobs[6]["estimatedJobTime"], 150 * 12) self.assertEqual(jobs[6]["estimatedDiskUsage"], 150 * 400) for job in jobs: self.assertEqual(job["estimatedMemoryUsage"], 2300) # Test interactions of this algorithm with splitOnRun = True # Make 2 files, one with 3 runs and a second one with the last run of the first fileA = File(lfn="/this/is/file1a", size=1000, events=2400) lumiListA = [] lumiListB = [] lumiListC = [] for lumi in range(8): lumiListA.append(1 + lumi) lumiListB.append(1 + lumi) lumiListC.append(1 + lumi) fileA.addRun(Run(1, *lumiListA)) fileA.addRun(Run(2, *lumiListA)) fileA.addRun(Run(3, *lumiListA)) fileA.setLocation("T1_US_FNAL_Disk") fileB = self.createFile('/this/is/file2a', 200, 3, 5, "T1_US_FNAL_Disk") testFileset = Fileset(name='FilesetB') testFileset.create() testFileset.addFile(fileA) testFileset.addFile(fileB) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # The settings for this splitting are 700 events per job jobGroups = jobFactory(splitOnRun=True, halt_job_on_file_boundaries=False, events_per_job=700, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") self.assertEqual(jobs[0]["estimatedJobTime"], 700 * 12) self.assertEqual(jobs[0]["estimatedDiskUsage"], 700 * 400) self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12) self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400) self.assertEqual(jobs[2]["estimatedJobTime"], 700 * 12) self.assertEqual(jobs[2]["estimatedDiskUsage"], 700 * 400) self.assertEqual(jobs[3]["estimatedJobTime"], 100 * 12) self.assertEqual(jobs[3]["estimatedDiskUsage"], 100 * 400) self.assertEqual(jobs[4]["estimatedJobTime"], 700 * 12) self.assertEqual(jobs[4]["estimatedDiskUsage"], 700 * 400) self.assertEqual(jobs[5]["estimatedJobTime"], 300 * 12) self.assertEqual(jobs[5]["estimatedDiskUsage"], 300 * 400)
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testOutput(self): """ _testOutput_ Creat a workflow and add some outputs to it. Verify that these are stored to and loaded from the database correctly. """ testFilesetA = Fileset(name="testFilesetA") testMergedFilesetA = Fileset(name="testMergedFilesetA") testFilesetB = Fileset(name="testFilesetB") testMergedFilesetB = Fileset(name="testMergedFilesetB") testFilesetC = Fileset(name="testFilesetC") testMergedFilesetC = Fileset(name="testMergedFilesetC") testFilesetA.create() testFilesetB.create() testFilesetC.create() testMergedFilesetA.create() testMergedFilesetB.create() testMergedFilesetC.create() testWorkflowA = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test') testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertEqual(len(testWorkflowB.outputMap), 0, "ERROR: Output map exists before output is assigned") testWorkflowA.addOutput("outModOne", testFilesetA, testMergedFilesetA) testWorkflowA.addOutput("outModOne", testFilesetC, testMergedFilesetC) testWorkflowA.addOutput("outModTwo", testFilesetB, testMergedFilesetB) testWorkflowC = Workflow(name="wf001", task='Test') testWorkflowC.load() self.assertEqual(len(testWorkflowC.outputMap), 2, "ERROR: Incorrect number of outputs in output map") self.assertTrue("outModOne" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") self.assertTrue("outModTwo" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") for outputMap in testWorkflowC.outputMap["outModOne"]: if outputMap["output_fileset"].id == testFilesetA.id: self.assertEqual(outputMap["merged_output_fileset"].id, testMergedFilesetA.id, "Error: Output map incorrectly maps filesets.") else: self.assertEqual(outputMap["merged_output_fileset"].id, testMergedFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(outputMap["output_fileset"].id, testFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(testWorkflowC.outputMap["outModTwo"][0]["merged_output_fileset"].id, testMergedFilesetB.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(testWorkflowC.outputMap["outModTwo"][0]["output_fileset"].id, testFilesetB.id, "Error: Output map incorrectly maps filesets.") return
def testGetFinishedWorkflows(self): """ _testGetFinishedWorkflows_ Test that we get only those workflows which are finished, that is, workflows where all its subscriptions are finished and all other workflows with the same spec are finished too """ owner = "no-one" # Create a bunch of worklows with "different" specs and tasks workflows = [] for i in range(0, 100): scaledIndex = i % 10 testWorkflow = Workflow(spec="sp00%i" % scaledIndex, owner=owner, name="wf00%i" % scaledIndex, task="task%i" % i) testWorkflow.create() workflows.append(testWorkflow) # Everyone will use this fileset testFileset = Fileset(name="TestFileset") testFileset.create() # Create subscriptions! subscriptions = [] for workflow in workflows: subscription = Subscription(fileset=testFileset, workflow=workflow) subscription.create() subscriptions.append(subscription) # Check that all workflows are NOT finished myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) getFinishedDAO = daoFactory(classname="Workflow.GetFinishedWorkflows") result = getFinishedDAO.execute() self.assertEqual(len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) # Mark the first 50 subscriptions as finished for idx, sub in enumerate(subscriptions): if idx > 49: break sub.markFinished() # No workflow is finished, none of them has all the subscriptions completed result = getFinishedDAO.execute() self.assertEqual(len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) # Now finish all workflows in wf{000-5} for idx, sub in enumerate(subscriptions): if idx < 50 or idx % 10 > 5: continue sub.markFinished() # Check the workflows result = getFinishedDAO.execute() self.assertEqual(len(result), 6, "A workflow is incorrectly flagged as finished: %s" % str(result)) # Check the overall structure of the workflows for wf in result: # Sanity checks on the results # These are very specific checks and depends heavily on the names of task, spec and workflow self.assertEqual(wf[2:], result[wf]['spec'][2:], "A workflow has the wrong spec-name combination: %s" % str(wf)) self.assertTrue(int(wf[2:]) < 6, "A workflow is incorrectly flagged as finished: %s" % str(wf)) self.assertEqual(len(result[wf]['workflows']), 10, "A workflow has more tasks than it should: %s" % str(result[wf])) for task in result[wf]['workflows']: self.assertEqual(len(result[wf]['workflows'][task]), 1, "A workflow has more subscriptions than it should: %s" % str(result[wf])) return
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name = "RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name = "MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name = "ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name = "MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name = "DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name = "MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name = "Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec = "wf001.xml", owner = "Steve", name = "TestWF", task = "None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec = "wf002.xml", owner = "Steve", name = "TestRecoMergeWF", task = "None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec = "wf004.xml", owner = "Steve", name = "TestRecoProcWF", task = "None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec = "wf003.xml", owner = "Steve", name = "TestAlcaChildWF", task = "None") testAlcaChildWorkflow.create() inputFile = File(lfn = "/path/to/some/lfn", size = 600000, events = 60000, locations = "cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") testMergeRecoSubscription = Subscription(fileset = recoOutputFileset, workflow = testRecoMergeWorkflow, split_algo = "WMBSMergeBySize", type = "Merge") testProcRecoSubscription = Subscription(fileset = recoOutputFileset, workflow = testRecoProcWorkflow, split_algo = "FileBased", type = "Processing") testChildAlcaSubscription = Subscription(fileset = alcaOutputFileset, workflow = testAlcaChildWorkflow, split_algo = "FileBased", type = "Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJob = Job(name = "SplitJobA", files = [inputFile]) testJob.create(group = testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname = "Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID = testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = {"output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id)} for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] == None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return
def testCompleteJobInput(self): """ _testCompleteJobInput_ Verify the correct output of the CompleteInput DAO. This should mark the input for a job as complete once all the jobs that run over a particular file have complete successfully. """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") bogusWorkflow = Workflow(spec = "spec1.xml", owner = "Steve", name = "wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name = "TestFileset") bogusFileset = Fileset(name = "BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) bogusSubscription = Subscription(fileset = bogusFileset, workflow = bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn = makeUUID(), locations = "setest.site.ch") testFileB = File(lfn = makeUUID(), locations = "setest.site.ch") testFileC = File(lfn = makeUUID(), locations = "setest.site.ch") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.acquireFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription = testSubscription) bogusJobGroup = JobGroup(subscription = bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name = "TestJobA", files = [testFileA]) testJobB = Job(name = "TestJobB", files = [testFileA, testFileB]) testJobC = Job(name = "TestJobC", files = [testFileC]) bogusJob = Job(name = "BogusJob", files = [testFileA, testFileB, testFileC]) testJobA.create(group = testJobGroup) testJobB.create(group = testJobGroup) testJobC.create(group = testJobGroup) bogusJob.create(group = bogusJobGroup) testJobA["outcome"] = "success" testJobB["outcome"] = "failure" testJobC["outcome"] = "success" testJobA.save() testJobB.save() testJobC.save() testJobA.completeInputFiles() compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles testJobB["outcome"] = "success" testJobB.save() testJobB.completeInputFiles(skipFiles = [testFileB["lfn"]]) availFiles = len(testSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(testSubscription.filesOfStatus("Acquired")) assert acqFiles == 1, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 1, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(testSubscription.filesOfStatus("Failed")) assert failFiles == 1, \ "Error: test sub has wrong number of failed files: %s" % failFiles availFiles = len(bogusSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(bogusSubscription.filesOfStatus("Acquired")) assert acqFiles == 3, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(bogusSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(bogusSubscription.filesOfStatus("Failed")) assert failFiles == 0, \ "Error: test sub has wrong number of failed files: %s" % failFiles return
def testFailJobInput(self): """ _testFailJobInput_ Test the Jobs.FailInput DAO and verify that it doesn't affect other jobs/subscriptions that run over the same files. """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") bogusWorkflow = Workflow(spec = "spec1.xml", owner = "Steve", name = "wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name = "TestFileset") bogusFileset = Fileset(name = "BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) bogusSubscription = Subscription(fileset = bogusFileset, workflow = bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn = makeUUID(), locations = "setest.site.ch") testFileB = File(lfn = makeUUID(), locations = "setest.site.ch") testFileC = File(lfn = makeUUID(), locations = "setest.site.ch") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.completeFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription = testSubscription) bogusJobGroup = JobGroup(subscription = bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name = "TestJobA", files = [testFileA, testFileB, testFileC]) testJobB = Job(name = "TestJobB", files = [testFileA, testFileB, testFileC]) bogusJob = Job(name = "BogusJob", files = [testFileA, testFileB, testFileC]) testJobA.create(group = testJobGroup) testJobB.create(group = testJobGroup) bogusJob.create(group = bogusJobGroup) testJobA.failInputFiles() testJobB.failInputFiles() self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") testJobB["state"] = "cleanout" changeStateAction.execute([testJobB]) # Try again testJobA.failInputFiles() # Should now be failed self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) # bogus should be unchanged self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3) self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0) return
def createGiantJobSet(self, name, config, nSubs=10, nJobs=10, nFiles=1, spec="spec.xml"): """ Creates a massive set of jobs """ jobList = [] for i in range(0, nSubs): # Make a bunch of subscriptions localName = '%s-%i' % (name, i) testWorkflow = Workflow(spec=spec, owner=self.OWNERDN, name=localName, task="Test", owner_vogroup="", owner_vorole="") testWorkflow.create() testWMBSFileset = Fileset(name=localName) testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() filesToComplete = [] for j in range(0, nJobs): # Create jobs for each subscription testFileA = File(lfn="%s-%i-lfnA" % (localName, j), size=1024, events=10) testFileA.addRun( Run( 10, *[ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ])) testFileA.setLocation('malpaquet') testFileA.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.commit() filesToComplete.append(testFileA) testJob = Job(name='%s-%i' % (localName, j)) testJob.addFile(testFileA) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) jobList.append(testJob) for k in range(0, nFiles): # Create output files testFile = File(lfn="%s-%i-output" % (localName, k), size=1024, events=10) testFile.addRun(Run(10, *[12312])) testFile.setLocation('malpaquet') testFile.create() testJobGroup.output.addFile(testFile) testJobGroup.output.commit() testJobGroup.commit() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') changer.propagate(testJobGroup.jobs, 'cleanout', 'success') testWMBSFileset.markOpen(0) testSubscription.completeFiles(filesToComplete) return jobList
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJobA = Job(name=makeUUID(), files=[]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroup) testJobA["state"] = "executing" testJobB = Job(name=makeUUID(), files=[]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroup) testJobB["state"] = "complete" testJobC = Job(name=makeUUID(), files=[]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory( classname="Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=False) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", type="LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] self.assertEqual(len(jobs), 2 * self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cleanCouch.algorithm() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse( os.path.exists( os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id="TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual( sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO'] ['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places=2) self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['average'], 17.786300000000001, places=2) self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['stdDev'], 0.0, places=2) # Check worstOffenders self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['worstOffenders'], [{ 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2 }]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], { '/TestWorkload/ReReco/LogCollect': [logCollectPFN for _ in range(10)] }) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance'][ '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEquals( workloadSummary['performance'] ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco'] ['cmsRun1'][x][y], places=2) return
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy): """ _configureRunStream_ Called by Tier0Feeder for new run/streams. Retrieve global run settings and build the part of the configuration relevant to run/stream and write it to the database. Create workflows, filesets and subscriptions for the processing of runs/streams. """ logging.debug("configureRunStream() : %d , %s" % (run, stream)) myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs if runInfo['hltkey'] != None: # streams not explicitely configured are repacked if stream not in tier0Config.Streams.dictionary_().keys(): addRepackConfig(tier0Config, stream) streamConfig = tier0Config.Streams.dictionary_()[stream] # consistency check to make sure stream exists and has datasets defined # only run if we don't ignore the stream if streamConfig.ProcessingStyle != "Ignore": getStreamDatasetsDAO = daoFactory( classname="RunConfig.GetStreamDatasets") datasets = getStreamDatasetsDAO.execute(run, stream, transaction=False) if len(datasets) == 0: raise RuntimeError( "Stream is not defined in HLT menu or has no datasets !") # write stream/dataset mapping (for special express and error datasets) insertDatasetDAO = daoFactory( classname="RunConfig.InsertPrimaryDataset") insertStreamDatasetDAO = daoFactory( classname="RunConfig.InsertStreamDataset") # write stream configuration insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertStreamStyleDAO = daoFactory( classname="RunConfig.InsertStreamStyle") insertRepackConfigDAO = daoFactory( classname="RunConfig.InsertRepackConfig") insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertExpressConfigDAO = daoFactory( classname="RunConfig.InsertExpressConfig") insertSpecialDatasetDAO = daoFactory( classname="RunConfig.InsertSpecialDataset") insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertRecoReleaseConfigDAO = daoFactory( classname="RunConfig.InsertRecoReleaseConfig") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") insertStorageNodeDAO = daoFactory( classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") bindsCMSSWVersion = [] bindsDataset = [] bindsStreamDataset = [] bindsStreamStyle = { 'RUN': run, 'STREAM': stream, 'STYLE': streamConfig.ProcessingStyle } bindsRepackConfig = {} bindsPromptCalibration = {} bindsExpressConfig = {} bindsSpecialDataset = {} bindsDatasetScenario = [] bindsStorageNode = [] bindsPhEDExConfig = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for spec creation, details for all outputs # outputModuleDetails = [] # # special dataset for some express output # specialDataset = None # # for PromptReco delay settings # promptRecoDelay = {} promptRecoDelayOffset = {} # # for PhEDEx subscription settings # subscriptions = [] # some hardcoded PhEDEx defaults expressPhEDExInjectNode = "T2_CH_CERN" expressPhEDExSubscribeNode = "T2_CH_CERN" # # first take care of all stream settings # getStreamOnlineVersionDAO = daoFactory( classname="RunConfig.GetStreamOnlineVersion") onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction=False) if streamConfig.ProcessingStyle == "Bulk": streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Repack.CMSSWVersion}) streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Repack.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRepackConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Repack.ProcessingVersion, 'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi, 'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi, 'MIN_SIZE': streamConfig.Repack.MinInputSize, 'MAX_SIZE': streamConfig.Repack.MaxInputSize, 'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize, 'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize, 'MAX_EVENTS': streamConfig.Repack.MaxInputEvents, 'MAX_FILES': streamConfig.Repack.MaxInputFiles, 'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay, 'CMSSW': streamConfig.Repack.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Repack.ScramArch } elif streamConfig.ProcessingStyle == "Express": specialDataset = "Stream%s" % stream bindsDataset.append({'PRIMDS': specialDataset}) bindsStreamDataset.append({ 'RUN': run, 'PRIMDS': specialDataset, 'STREAM': stream }) bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset} bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': specialDataset, 'SCENARIO': streamConfig.Express.Scenario }) if streamConfig.Express.WriteDQM: outputModuleDetails.append({ 'dataTier': tier0Config.Global.DQMDataTier, 'eventContent': tier0Config.Global.DQMDataTier, 'primaryDataset': specialDataset }) bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode}) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': specialDataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': specialDataset }) alcaSkim = None if len(streamConfig.Express.AlcaSkims) > 0: outputModuleDetails.append({ 'dataTier': "ALCARECO", 'eventContent': "ALCARECO", 'primaryDataset': specialDataset }) alcaSkim = ",".join(streamConfig.Express.AlcaSkims) numPromptCalibProd = 0 for producer in streamConfig.Express.AlcaSkims: if producer.startswith("PromptCalibProd"): numPromptCalibProd += 1 if numPromptCalibProd > 0: bindsPromptCalibration = { 'RUN': run, 'STREAM': stream, 'NUM_PRODUCER': numPromptCalibProd } dqmSeq = None if len(streamConfig.Express.DqmSequences) > 0: dqmSeq = ",".join(streamConfig.Express.DqmSequences) streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get( onlineVersion, onlineVersion) bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.CMSSWVersion}) streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.CMSSWVersion, tier0Config.Global.DefaultScramArch) streamConfig.Express.RecoScramArch = None if streamConfig.Express.RecoCMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': streamConfig.Express.RecoCMSSWVersion}) streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get( streamConfig.Express.RecoCMSSWVersion, tier0Config.Global.DefaultScramArch) bindsExpressConfig = { 'RUN': run, 'STREAM': stream, 'PROC_VER': streamConfig.Express.ProcessingVersion, 'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers), 'WRITE_DQM': streamConfig.Express.WriteDQM, 'GLOBAL_TAG': streamConfig.Express.GlobalTag, 'MAX_RATE': streamConfig.Express.MaxInputRate, 'MAX_EVENTS': streamConfig.Express.MaxInputEvents, 'MAX_SIZE': streamConfig.Express.MaxInputSize, 'MAX_FILES': streamConfig.Express.MaxInputFiles, 'MAX_LATENCY': streamConfig.Express.MaxLatency, 'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval, 'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay, 'CMSSW': streamConfig.Express.CMSSWVersion, 'SCRAM_ARCH': streamConfig.Express.ScramArch, 'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion, 'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch, 'MULTICORE': streamConfig.Express.Multicore, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq } # # then configure datasets # getStreamDatasetTriggersDAO = daoFactory( classname="RunConfig.GetStreamDatasetTriggers") datasetTriggers = getStreamDatasetTriggersDAO.execute( run, stream, transaction=False) for dataset, paths in datasetTriggers.items(): datasetConfig = retrieveDatasetConfig(tier0Config, dataset) selectEvents = [] for path in sorted(paths): selectEvents.append("%s:%s" % (path, runInfo['process'])) if streamConfig.ProcessingStyle == "Bulk": promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay promptRecoDelayOffset[ datasetConfig.Name] = datasetConfig.RecoDelayOffset outputModuleDetails.append({ 'dataTier': "RAW", 'eventContent': "ALL", 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': datasetConfig.ArchivalNode, 'TAPE_NODE': datasetConfig.TapeNode, 'DISK_NODE': datasetConfig.DiskNode }) custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: bindsStorageNode.append( {'NODE': datasetConfig.ArchivalNode}) custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.TapeNode != None: bindsStorageNode.append({'NODE': datasetConfig.TapeNode}) custodialSites.append(datasetConfig.TapeNode) if datasetConfig.DiskNode != None: bindsStorageNode.append({'NODE': datasetConfig.DiskNode}) nonCustodialSites.append(datasetConfig.DiskNode) autoApproveSites.append(datasetConfig.DiskNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RAW" }) # # set subscriptions for error dataset # custodialSites = [] nonCustodialSites = [] autoApproveSites = [] if datasetConfig.ArchivalNode != None: custodialSites.append(datasetConfig.ArchivalNode) autoApproveSites.append(datasetConfig.ArchivalNode) if datasetConfig.ArchivalNode != expressPhEDExInjectNode: nonCustodialSites.append(expressPhEDExInjectNode) autoApproveSites.append(expressPhEDExInjectNode) if len(custodialSites) > 0 or len(nonCustodialSites) > 0: subscriptions.append({ 'custodialSites': custodialSites, 'custodialSubType': "Replica", 'nonCustodialSites': nonCustodialSites, 'autoApproveSites': autoApproveSites, 'priority': "high", 'primaryDataset': "%s-Error" % dataset, 'dataTier': "RAW" }) elif streamConfig.ProcessingStyle == "Express": for dataTier in streamConfig.Express.DataTiers: if dataTier not in ["ALCARECO", "DQM", "DQMIO"]: outputModuleDetails.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'selectEvents': selectEvents, 'primaryDataset': dataset }) bindsPhEDExConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'ARCHIVAL_NODE': None, 'TAPE_NODE': None, 'DISK_NODE': expressPhEDExSubscribeNode }) subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [expressPhEDExSubscribeNode], 'autoApproveSites': [expressPhEDExSubscribeNode], 'priority': "high", 'primaryDataset': dataset }) # # finally create WMSpec # outputs = {} if streamConfig.ProcessingStyle == "Bulk": taskName = "Repack" workflowName = "Repack_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 1 specArguments['SizePerEvent'] = 200 specArguments['Memory'] = 1800 specArguments[ 'RequestPriority'] = tier0Config.Global.BaseRequestPriority + 5000 specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion specArguments['ScramArch'] = streamConfig.Repack.ScramArch specArguments[ 'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion specArguments[ 'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi specArguments[ 'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize specArguments[ 'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments[ 'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type'] specArguments[ 'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay elif streamConfig.ProcessingStyle == "Express": taskName = "Express" workflowName = "Express_Run%d_Stream%s" % (run, stream) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if streamConfig.Express.Multicore: specArguments['Multicore'] = streamConfig.Express.Multicore specArguments['Memory'] = 1800 * streamConfig.Express.Multicore specArguments[ 'RequestPriority'] = tier0Config.Global.BaseRequestPriority + 10000 specArguments['ProcessingString'] = "Express" specArguments[ 'ProcessingVersion'] = streamConfig.Express.ProcessingVersion specArguments['Scenario'] = streamConfig.Express.Scenario specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion specArguments['ScramArch'] = streamConfig.Express.ScramArch specArguments[ 'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch specArguments['GlobalTag'] = streamConfig.Express.GlobalTag specArguments['GlobalTagTransaction'] = "Express_%d" % run specArguments[ 'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate specArguments[ 'MaxInputEvents'] = streamConfig.Express.MaxInputEvents specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles specArguments['MaxLatency'] = streamConfig.Express.MaxLatency specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims specArguments['DqmSequences'] = streamConfig.Express.DqmSequences specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout'] specArguments['AlcaHarvestDir'] = runInfo['ah_dir'] specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments['StreamName'] = stream specArguments['SpecialDataset'] = specialDataset specArguments['UnmergedLFNBase'] = "/store/unmerged/express" specArguments['MergedLFNBase'] = "/store/express" if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[ 'backfill'] else: specArguments['MergedLFNBase'] = "/store/express" specArguments[ 'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval specArguments[ 'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay if streamConfig.ProcessingStyle in ['Bulk', 'Express']: specArguments['RunNumber'] = run specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra specArguments['Outputs'] = outputModuleDetails specArguments[ 'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override" specArguments['ValidStatus'] = "VALID" specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"] specArguments['SiteBlacklist'] = [] if streamConfig.ProcessingStyle == "Bulk": factory = RepackWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) elif streamConfig.ProcessingStyle == "Express": factory = ExpressWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) filesetName = "Run%d_Stream%s" % (run, stream) fileset = Fileset(filesetName) # # create workflow (currently either repack or express) # try: myThread.transaction.begin() if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsDataset) > 0: insertDatasetDAO.execute(bindsDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsStreamDataset) > 0: insertStreamDatasetDAO.execute(bindsStreamDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsRepackConfig) > 0: insertRepackConfigDAO.execute(bindsRepackConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsPromptCalibration) > 0: insertPromptCalibrationDAO.execute( bindsPromptCalibration, conn=myThread.transaction.conn, transaction=True) if len(bindsExpressConfig) > 0: insertExpressConfigDAO.execute(bindsExpressConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsSpecialDataset) > 0: insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn=myThread.transaction.conn, transaction=True) if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute( bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsPhEDExConfig) > 0: insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn=myThread.transaction.conn, transaction=True) insertStreamStyleDAO.execute(bindsStreamStyle, conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle in ['Bulk', 'Express']: insertStreamFilesetDAO.execute(run, stream, filesetName, conn=myThread.transaction.conn, transaction=True) fileset.load() wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute( [fileset.id], conn=myThread.transaction.conn, transaction=True) if streamConfig.ProcessingStyle == "Bulk": bindsRecoReleaseConfig = [] for fileset, primds in wmbsHelper.getMergeOutputMapping( ).items(): bindsRecoReleaseConfig.append({ 'RUN': run, 'PRIMDS': primds, 'FILESET': fileset, 'RECODELAY': promptRecoDelay[primds], 'RECODELAYOFFSET': promptRecoDelayOffset[primds] }) insertRecoReleaseConfigDAO.execute( bindsRecoReleaseConfig, conn=myThread.transaction.conn, transaction=True) elif streamConfig.ProcessingStyle == "Express": markWorkflowsInjectedDAO.execute( [workflowName], injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in configureRunStream() database transaction !") else: myThread.transaction.commit() else: # should we do anything for local runs ? pass return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy): """ _releasePromptReco_ Called by Tier0Feeder Finds all run/primds that need to be released for PromptReco ( run.end_time + reco_release_config.delay > now AND run.end_time > 0 ) Create workflows and subscriptions for the processing of runs/datasets. """ logging.debug("releasePromptReco()") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) insertDatasetScenarioDAO = daoFactory( classname="RunConfig.InsertDatasetScenario") insertCMSSWVersionDAO = daoFactory( classname="RunConfig.InsertCMSSWVersion") insertRecoConfigDAO = daoFactory(classname="RunConfig.InsertRecoConfig") insertStorageNodeDAO = daoFactory(classname="RunConfig.InsertStorageNode") insertPhEDExConfigDAO = daoFactory( classname="RunConfig.InsertPhEDExConfig") releasePromptRecoDAO = daoFactory(classname="RunConfig.ReleasePromptReco") insertWorkflowMonitoringDAO = daoFactory( classname="RunConfig.InsertWorkflowMonitoring") bindsDatasetScenario = [] bindsCMSSWVersion = [] bindsRecoConfig = [] bindsStorageNode = [] bindsReleasePromptReco = [] # mark workflows as injected wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) markWorkflowsInjectedDAO = wmbsDaoFactory( classname="Workflow.MarkInjectedWorkflows") # # for creating PromptReco specs # recoSpecs = {} # # for PhEDEx subscription settings # subscriptions = [] findRecoReleaseDAO = daoFactory(classname="RunConfig.FindRecoRelease") recoRelease = findRecoReleaseDAO.execute(transaction=False) for run in sorted(recoRelease.keys()): # retrieve some basic run information getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo") runInfo = getRunInfoDAO.execute(run, transaction=False)[0] # retrieve phedex configs for run getPhEDExConfigDAO = daoFactory(classname="RunConfig.GetPhEDExConfig") phedexConfigs = getPhEDExConfigDAO.execute(run, transaction=False) for (dataset, fileset, repackProcVer) in recoRelease[run]: bindsReleasePromptReco.append({ 'RUN': run, 'PRIMDS': dataset, 'NOW': int(time.time()) }) datasetConfig = retrieveDatasetConfig(tier0Config, dataset) bindsDatasetScenario.append({ 'RUN': run, 'PRIMDS': dataset, 'SCENARIO': datasetConfig.Scenario }) if datasetConfig.CMSSWVersion != None: bindsCMSSWVersion.append( {'VERSION': datasetConfig.CMSSWVersion}) alcaSkim = None if len(datasetConfig.AlcaSkims) > 0: alcaSkim = ",".join(datasetConfig.AlcaSkims) dqmSeq = None if len(datasetConfig.DqmSequences) > 0: dqmSeq = ",".join(datasetConfig.DqmSequences) datasetConfig.ScramArch = tier0Config.Global.ScramArches.get( datasetConfig.CMSSWVersion, tier0Config.Global.DefaultScramArch) bindsRecoConfig.append({ 'RUN': run, 'PRIMDS': dataset, 'DO_RECO': int(datasetConfig.DoReco), 'RECO_SPLIT': datasetConfig.RecoSplit, 'WRITE_RECO': int(datasetConfig.WriteRECO), 'WRITE_DQM': int(datasetConfig.WriteDQM), 'WRITE_AOD': int(datasetConfig.WriteAOD), 'PROC_VER': datasetConfig.ProcessingVersion, 'ALCA_SKIM': alcaSkim, 'DQM_SEQ': dqmSeq, 'BLOCK_DELAY': datasetConfig.BlockCloseDelay, 'CMSSW': datasetConfig.CMSSWVersion, 'SCRAM_ARCH': datasetConfig.ScramArch, 'MULTICORE': datasetConfig.Multicore, 'GLOBAL_TAG': datasetConfig.GlobalTag }) phedexConfig = phedexConfigs[dataset] # do things different based on whether we have TapeNode/DiskNode or ArchivalNode if phedexConfig['tape_node'] != None and phedexConfig[ 'disk_node'] != None: if datasetConfig.WriteAOD: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [phedexConfig['disk_node']], 'autoApproveSites': [phedexConfig['disk_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "AOD" }) if len(datasetConfig.AlcaSkims) > 0: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "ALCARECO" }) if datasetConfig.WriteDQM: subscriptions.append({ 'custodialSites': [phedexConfig['tape_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [], 'priority': "high", 'primaryDataset': dataset, 'dataTier': tier0Config.Global.DQMDataTier }) if datasetConfig.WriteRECO: subscriptions.append({ 'custodialSites': [], 'nonCustodialSites': [phedexConfig['disk_node']], 'autoApproveSites': [phedexConfig['disk_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RECO" }) elif phedexConfig['archival_node'] != None: if datasetConfig.WriteAOD: subscriptions.append({ 'custodialSites': [phedexConfig['archival_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [phedexConfig['archival_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "AOD" }) if len(datasetConfig.AlcaSkims) > 0: subscriptions.append({ 'custodialSites': [phedexConfig['archival_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [phedexConfig['archival_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "ALCARECO" }) if datasetConfig.WriteDQM: subscriptions.append({ 'custodialSites': [phedexConfig['archival_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [phedexConfig['archival_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': tier0Config.Global.DQMDataTier }) if datasetConfig.WriteRECO: subscriptions.append({ 'custodialSites': [phedexConfig['archival_node']], 'custodialSubType': "Replica", 'nonCustodialSites': [], 'autoApproveSites': [phedexConfig['archival_node']], 'priority': "high", 'primaryDataset': dataset, 'dataTier': "RECO" }) writeTiers = [] if datasetConfig.WriteRECO: writeTiers.append("RECO") if datasetConfig.WriteAOD: writeTiers.append("AOD") if datasetConfig.WriteDQM: writeTiers.append(tier0Config.Global.DQMDataTier) if len(datasetConfig.AlcaSkims) > 0: writeTiers.append("ALCARECO") if datasetConfig.DoReco and len(writeTiers) > 0: # # create WMSpec # taskName = "Reco" workflowName = "PromptReco_Run%d_%s" % (run, dataset) specArguments = {} specArguments['TimePerEvent'] = 12 specArguments['SizePerEvent'] = 512 specArguments['Memory'] = 1800 if datasetConfig.Multicore: specArguments['Multicore'] = datasetConfig.Multicore specArguments['Memory'] = 1800 * datasetConfig.Multicore specArguments[ 'RequestPriority'] = tier0Config.Global.BaseRequestPriority specArguments['AcquisitionEra'] = runInfo['acq_era'] specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion specArguments['ScramArch'] = datasetConfig.ScramArch specArguments['RunNumber'] = run specArguments['SplittingAlgo'] = "EventAwareLumiBased" specArguments['EventsPerJob'] = datasetConfig.RecoSplit specArguments['ProcessingString'] = "PromptReco" specArguments[ 'ProcessingVersion'] = datasetConfig.ProcessingVersion specArguments['Scenario'] = datasetConfig.Scenario specArguments['GlobalTag'] = datasetConfig.GlobalTag specArguments[ 'GlobalTagConnect'] = datasetConfig.GlobalTagConnect specArguments['InputDataset'] = "/%s/%s-%s/RAW" % ( dataset, runInfo['acq_era'], repackProcVer) specArguments['WriteTiers'] = writeTiers specArguments['AlcaSkims'] = datasetConfig.AlcaSkims specArguments['DqmSequences'] = datasetConfig.DqmSequences specArguments[ 'UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[ 'bulk_data_type'] if runInfo['backfill']: specArguments[ 'MergedLFNBase'] = "/store/backfill/%s/%s" % ( runInfo['backfill'], runInfo['bulk_data_type']) else: specArguments['MergedLFNBase'] = "/store/%s" % runInfo[ 'bulk_data_type'] specArguments['ValidStatus'] = "VALID" specArguments['EnableHarvesting'] = "True" specArguments['DQMUploadProxy'] = dqmUploadProxy specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl'] specArguments[ 'BlockCloseDelay'] = datasetConfig.BlockCloseDelay specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist specArguments['SiteBlacklist'] = [] specArguments['TrustSitelists'] = "True" factory = PromptRecoWorkloadFactory() wmSpec = factory.factoryWorkloadConstruction( workflowName, specArguments) wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc']) for subscription in subscriptions: wmSpec.setSubscriptionInformation(**subscription) wmSpec.setOwnerDetails( "*****@*****.**", "T0", { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT', 'dn': "*****@*****.**" }) wmSpec.setupPerformanceMonitoring(maxRSS=10485760, maxVSize=10485760, softTimeout=604800, gracePeriod=3600) wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory) recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset) try: myThread.transaction.begin() if len(bindsDatasetScenario) > 0: insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn=myThread.transaction.conn, transaction=True) if len(bindsCMSSWVersion) > 0: insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn=myThread.transaction.conn, transaction=True) if len(bindsRecoConfig) > 0: insertRecoConfigDAO.execute(bindsRecoConfig, conn=myThread.transaction.conn, transaction=True) if len(bindsStorageNode) > 0: insertStorageNodeDAO.execute(bindsStorageNode, conn=myThread.transaction.conn, transaction=True) if len(bindsReleasePromptReco) > 0: releasePromptRecoDAO.execute(bindsReleasePromptReco, conn=myThread.transaction.conn, transaction=True) for (wmbsHelper, wmSpec, fileset) in recoSpecs.values(): wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id=fileset), alternativeFilesetClose=True) insertWorkflowMonitoringDAO.execute([fileset], conn=myThread.transaction.conn, transaction=True) if len(recoSpecs) > 0: markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected=True, conn=myThread.transaction.conn, transaction=True) except Exception as ex: logging.exception(ex) myThread.transaction.rollback() raise RuntimeError( "Problem in releasePromptReco() database transaction !") else: myThread.transaction.commit() return
def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) dummyLocationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def _createSubscriptionsInWMBS(self, task, fileset, alternativeFilesetClose=False): """ __createSubscriptionsInWMBS_ Create subscriptions in WMBS for all the tasks in the spec. This includes filesets, workflows and the output map for each task. """ # create runtime sandbox for workflow self.createSandbox() # FIXME: Let workflow put in values if spec is missing them workflow = Workflow(spec=self.wmSpec.specUrl(), owner=self.wmSpec.getOwner()["name"], dn=self.wmSpec.getOwner().get("dn", "unknown"), group=self.wmSpec.getOwner().get("group", "unknown"), owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"), owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"), name=self.wmSpec.name(), task=task.getPathName(), wfType=self.wmSpec.getDashboardActivity(), alternativeFilesetClose=alternativeFilesetClose, priority=self.wmSpec.priority()) workflow.create() subscription = Subscription(fileset=fileset, workflow=workflow, split_algo=task.jobSplittingAlgorithm(), type=task.getPrimarySubType()) if subscription.exists(): subscription.load() msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)" self.logger.info(msg % (subscription['id'], task.getPathName())) else: subscription.create() for site in task.siteWhitelist(): subscription.addWhiteBlackList([{"site_name": site, "valid": True}]) for site in task.siteBlacklist(): subscription.addWhiteBlackList([{"site_name": site, "valid": False}]) if self.topLevelSubscription is None: self.topLevelSubscription = subscription logging.info("Top level subscription created: %s", subscription["id"]) else: logging.info("Child subscription created: %s", subscription["id"]) outputModules = task.getOutputModulesForTask() ignoredOutputModules = task.getIgnoredOutputModulesForTask() for outputModule in outputModules: for outputModuleName in outputModule.listSections_(): if outputModuleName in ignoredOutputModules: logging.info("IgnoredOutputModule set for %s, skipping fileset creation.", outputModuleName) continue dataTier = getattr(getattr(outputModule, outputModuleName), "dataTier", '') filesetName = self.outputFilesetName(task, outputModuleName, dataTier) outputFileset = Fileset(filesetName) outputFileset.create() outputFileset.markOpen(True) mergedOutputFileset = None for childTask in task.childTaskIterator(): if childTask.data.input.outputModule == outputModuleName: childDatatier = getattr(childTask.data.input, 'dataTier', '') if childTask.taskType() in ["Cleanup", "Merge"] and childDatatier != dataTier: continue elif childTask.taskType() == "Merge" and childDatatier == dataTier: filesetName = self.outputFilesetName(childTask, "Merged", dataTier) mergedOutputFileset = Fileset(filesetName) mergedOutputFileset.create() mergedOutputFileset.markOpen(True) primaryDataset = getattr(getattr(outputModule, outputModuleName), "primaryDataset", None) if primaryDataset is not None: self.mergeOutputMapping[mergedOutputFileset.id] = primaryDataset self._createSubscriptionsInWMBS(childTask, outputFileset, alternativeFilesetClose) if mergedOutputFileset is None: workflow.addOutput(outputModuleName + dataTier, outputFileset, outputFileset) else: workflow.addOutput(outputModuleName + dataTier, outputFileset, mergedOutputFileset) return self.topLevelSubscription
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None, wfPrio=1, changeState=None): """ _createJobGroups_ Creates a series of jobGroups for submissions changeState is an instance of the ChangeState class to make job status changes """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=wfPrio) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) if changeState: for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') return jobGroupList
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "somese.cern.ch") locationAction.execute(siteName = "site2", seName = "otherse.cern.ch") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45])) newFile.create() newFile.addParent(lfn = parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["otherse.cern.ch", "somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test" ) testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.multipleSiteSubscription.create() self.performanceParams = {'timePerEvent' : 12, 'memoryRequirement' : 2300, 'sizePerEvent' : 400} return
def load(self): """ _load_ Load a workflow from WMBS. One of the following must be provided: - The workflow ID - The workflow name and task - The workflow spec and owner and task """ existingTransaction = self.beginTransaction() if self.id > 0: action = self.daofactory(classname="Workflow.LoadFromID") result = action.execute(workflow=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) elif self.name != None: action = self.daofactory(classname="Workflow.LoadFromName") result = action.execute(workflow=self.name, task=self.task, conn=self.getDBConn(), transaction=self.existingTransaction()) else: action = self.daofactory(classname="Workflow.LoadFromSpecOwner") result = action.execute(spec=self.spec, dn=self.dn, task=self.task, conn=self.getDBConn(), transaction=self.existingTransaction()) self.id = result["id"] self.spec = result["spec"] self.name = result["name"] self.owner = result["owner"] self.dn = result["dn"] self.group = result["grp"] self.vorole = result["vogrp"] self.vogroup = result["vorole"] self.task = result["task"] self.wfType = result["type"] action = self.daofactory(classname="Workflow.LoadOutput") results = action.execute(workflow=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.outputMap = {} for outputID in results.keys(): for outputMap in results[outputID]: outputFileset = Fileset(id=outputMap["output_fileset"]) if outputMap["merged_output_fileset"] != None: mergedOutputFileset = Fileset( id=outputMap["merged_output_fileset"]) else: mergedOutputFileset = None if not self.outputMap.has_key(outputID): self.outputMap[outputID] = [] self.outputMap[outputID].append({ "output_fileset": outputFileset, "merged_output_fileset": mergedOutputFileset }) self.commitTransaction(existingTransaction) return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "DQMoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset(name="/TestWorkload/DataProcessing/unmerged-DQMoutput") unmergedDqm.loadData() dqmMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "DQMoutput"]: unmerged = Fileset(name="/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def createTestJobGroup(self, config, name="TestWorkthrough", filesetName="TestFileset", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", type="Processing"): """ Creates a group of several jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner=self.OWNERDN, name=name, task=task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names=[name], injected=True) testWMBSFileset = Fileset(name=filesetName) testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name='%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=type) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') report1.load(filename=path1) report2.load(filename=path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs / 2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == 'mysql': return testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) testSubscription.create() testFileA = File(lfn = makeUUID(), locations = "test.site.ch") testFileB = File(lfn = makeUUID(), locations = "test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname = "Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input = 10) testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input = 5) testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.exists(), 12) return