def testMonteCarlo(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testRelValMCWithPileup(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() # add pile up configuration defaultArguments["PileupConfig"] = {"mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "data": ["/some/minbias/dataset3"]} testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testRelValMC(self): """ Configure, instantiate, install into WMBS and check that the subscriptions in WMBS are setup correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "relvalmc_t" defaultArguments["GenOutputModuleName"] = "OutputA" defaultArguments["StepOneOutputModuleName"] = "OutputB" defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig() defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig() defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() testWorkload = relValMCWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # now run the tests on single workload instance installed into WMBS # each of the subtests is dealing with specific tasks self._generationTaskTest() self._stepTwoTaskTest() self._stepOneTaskTest() return
def testRelValMCWithPileup(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() # add pile up configuration defaultArguments["PileupConfig"] = { "mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "data": ["/some/minbias/dataset3"] } testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testThreeStepChainedReDigi(self): """ _testThreeStepChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW and RECO data can be created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False defaultArguments["KeepStepTwoOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") self.assertTrue(len(testWorkload.getTopLevelTask()) == 1, "Error: Wrong number of top level tasks.") topLevelTask = testWorkload.getTopLevelTask()[0] topLevelStep = topLevelTask.steps() cmsRun2Step = topLevelStep.getStep("cmsRun2").getTypeHelper() self.assertTrue(len(cmsRun2Step.listOutputModules()) == 2, "Error: Wrong number of output modules in cmsRun2.") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testCombinedReDigiRecoConfig(self): """ _testCombinedReDigiRecoConfig_ Verify that a ReDigi workflow that uses a single step one config installs into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase, combinedStepOne = True) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testMCWithLHE(self): """ _testMCWithLHE_ Create a MonteCarlo workflow with a variation on the type of work done, this refers to the previous LHEStepZero where the input can be .lhe files and there is more than one lumi per job. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["LheInputFiles"] = "True" defaultArguments["EventsPerJob"] = 200 defaultArguments["EventsPerLumi"] = 50 factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') splitting = productionTask.jobSplittingParameters() self.assertEqual(splitting["events_per_job"], 200) self.assertEqual(splitting["events_per_lumi"], 50) self.assertEqual(splitting["lheInputFiles"], True) self.assertFalse(splitting["deterministicPileup"]) return
def testSingleStepReDigi(self): """ _testSingleStepReDigi_ Verify that a single step ReDigi workflow can be created and installed correctly into WMBS. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[2] defaultArguments["StepTwoConfigCacheID"] = None defaultArguments["StepThreeConfigCacheID"] = None testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testRelValMCWithPileup(self): """ Configure, instantiate, install into WMBS and check that the subscriptions in WMBS are setup correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "relvalmc_t" defaultArguments["GenOutputModuleName"] = "OutputA" defaultArguments["StepOneOutputModuleName"] = "OutputB" defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig() defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig() defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() # add pile up information - for the generation task defaultArguments["PileupConfig"] = {"cosmics": ["/some/cosmics/dataset1","/some/cosmics/dataset2"], "minbias": ["/some/minbias/dataset3"]} testWorkload = relValMCWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # now run the tests on single workload instance installed into WMBS # each of the subtests is dealing with specific tasks self._generationTaskTest() self._stepOneTaskTest() self._stepTwoTaskTest()
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testMonteCarlo(self): """ _testMonteCarlo_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testCombinedReDigiRecoConfig(self): """ _testCombinedReDigiRecoConfig_ Verify that a ReDigi workflow that uses a single step one config installs into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs(combinedStepOne = True) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[2] defaultArguments["StepThreeConfigCacheID"] = None defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput" testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testMCWithPileup(self): """ _testMCWithPileup_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() # Add pileup inputs defaultArguments["MCPileup"] = COSMICS_PU defaultArguments["DataPileup"] = DATA_PU defaultArguments["DeterministicPileup"] = True factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') cmsRunStep = productionTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertEqual(pileupData.data.dataset, [DATA_PU]) self.assertEqual(pileupData.mc.dataset, [COSMICS_PU]) splitting = productionTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) return
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/StoreResults'] expWfTasks = [ '/TestWorkload/StoreResults', '/TestWorkload/StoreResults/StoreResultsLogCollect' ] expFsets = [ 'TestWorkload-StoreResults-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/StoreResults/merged-Merged', '/TestWorkload/StoreResults/merged-logArchive' ] subMaps = [ (2, '/TestWorkload/StoreResults/merged-logArchive', '/TestWorkload/StoreResults/StoreResultsLogCollect', 'MinFileBased', 'LogCollect'), (1, 'TestWorkload-StoreResults-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/StoreResults', 'ParentlessMergeBySize', 'Merge') ] testArguments = StoreResultsWorkloadFactory.getTestArguments() testArguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) print("Tasks producing output:\n%s" % pformat(testWorkload.listOutputProducingTasks())) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") print("List of workflow tasks:\n%s" % pformat([item['task'] for item in workflows])) self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() print("List of filesets:\n%s" % pformat([item[1] for item in filesets])) self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) print("List of subscriptions:\n%s" % pformat(subscriptions)) self.assertItemsEqual(subscriptions, subMaps)
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = {"Merged": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = testWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [] expWfTasks = [ '/TestWorkload/EndOfRunDQMHarvest', '/TestWorkload/EndOfRunDQMHarvest/EndOfRunDQMHarvestLogCollect' ] expFsets = [ 'TestWorkload-EndOfRunDQMHarvest-/NoBPTX/Run2016F-23Sep2016-v1/DQMIO', '/TestWorkload/EndOfRunDQMHarvest/unmerged-logArchive' ] subMaps = [ (2, '/TestWorkload/EndOfRunDQMHarvest/unmerged-logArchive', '/TestWorkload/EndOfRunDQMHarvest/EndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (1, 'TestWorkload-EndOfRunDQMHarvest-/NoBPTX/Run2016F-23Sep2016-v1/DQMIO', '/TestWorkload/EndOfRunDQMHarvest', 'Harvest', 'Harvesting') ] testArguments = DQMHarvestWorkloadFactory.getTestArguments() testArguments.update(REQUEST) testArguments['DQMConfigCacheID'] = self.injectDQMHarvestConfig() testArguments.pop("ConfigCacheID", None) factory = DQMHarvestWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "EndOfRunDQMHarvest", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testMonteCarloExtension(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first event and lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstEvent"] = 3571428573 defaultArguments["FirstLumi"] = 26042 defaultArguments["TimePerEvent"] = 15 defaultArguments["FilterEfficiency"] = 0.014 defaultArguments["TotalTime"] = 28800 initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath( '/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testMonteCarloExtension(self): """ _testMonteCarloExtension_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstLumi"] = 10001 defaultArguments["EventsPerJob"] = 100 defaultArguments["FirstEvent"] = 10001 #defaultArguments["FirstEvent"] = 10001 initial_lfn_counter = 100 # EventsPerJob == EventsPerLumi, then the number of previous jobs is equal to the number of the initial lumi factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath( '/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testMonteCarloExtension(self): """ _testMonteCarloExtension_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstLumi"] = 10001 defaultArguments["EventsPerJob"] = 100 defaultArguments["FirstEvent"] = 10001 # defaultArguments["FirstEvent"] = 10001 initial_lfn_counter = ( 100 ) # EventsPerJob == EventsPerLumi, then the number of previous jobs is equal to the number of the initial lumi factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath("/TestWorkload/Production") productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath("/TestWorkload/Production/ProductionMerge%s" % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testMonteCarloExtension(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first event and lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstEvent"] = 3571428573 defaultArguments["FirstLumi"] = 26042 defaultArguments["TimePerEvent"] = 15 defaultArguments["FilterEfficiency"] = 0.014 defaultArguments["TotalTime"] = 28800 initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath('/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return
def testLHEStepZero(self): """ _testLHEStepZero_ Make sure that the workload can be created and complies with the common MonteCarlo test """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest()
def testMonteCarlo(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProdConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest()
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [] expWfTasks = ['/TestWorkload/EndOfRunDQMHarvest', '/TestWorkload/EndOfRunDQMHarvest/EndOfRunDQMHarvestLogCollect'] expFsets = ['TestWorkload-EndOfRunDQMHarvest-/NoBPTX/Run2016F-23Sep2016-v1/DQMIO', '/TestWorkload/EndOfRunDQMHarvest/unmerged-logArchive'] subMaps = [(2, '/TestWorkload/EndOfRunDQMHarvest/unmerged-logArchive', '/TestWorkload/EndOfRunDQMHarvest/EndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (1, 'TestWorkload-EndOfRunDQMHarvest-/NoBPTX/Run2016F-23Sep2016-v1/DQMIO', '/TestWorkload/EndOfRunDQMHarvest', 'Harvest', 'Harvesting')] testArguments = DQMHarvestWorkloadFactory.getTestArguments() testArguments.update(REQUEST) testArguments['DQMConfigCacheID'] = self.injectDQMHarvestConfig() testArguments.pop("ConfigCacheID", None) factory = DQMHarvestWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "EndOfRunDQMHarvest", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" defaultArguments["MCPileup"] = "/mixing/pileup/DATASET" defaultArguments["KeepStepOneOutput"] = False factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # Verify that pileup is configured for both of the cmsRun steps in the # top level task. topLevelTask = testWorkload.getTopLevelTask()[0] cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1") cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2") cmsRun1PileupConfig = cmsRun1Helper.getPileup() cmsRun2PileupConfig = cmsRun2Helper.getPileup() self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset") self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" defaultArguments["MCPileup"] = PILEUP_DATASET defaultArguments["KeepStepOneOutput"] = False factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # Verify that pileup is configured for both of the cmsRun steps in the # top level task. topLevelTask = testWorkload.getTopLevelTask()[0] cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1") cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2") cmsRun1PileupConfig = cmsRun1Helper.getPileup() cmsRun2PileupConfig = cmsRun2Helper.getPileup() self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset") self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testMonteCarlo(self): """ _testMonteCarlo_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") # Verify that pileup is configured for both of the cmsRun steps in the # top level task. topLevelTask = testWorkload.getTopLevelTask()[0] cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1") cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2") cmsRun1PileupConfig = cmsRun1Helper.getPileup() cmsRun2PileupConfig = cmsRun2Helper.getPileup() self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset") self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return
def testSingleStepReDigi(self): """ _testSingleStepReDigi_ Verify that a single step ReDigi workflow can be created and installed correctly into WMBS. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[2] factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testLHEStepZero(self): """ _testLHEStepZero_ Make sure that the workload can be created and complies with the common MonteCarlo test """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest()
def testThreeStepChainedReDigi(self): """ _testThreeStepChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW and RECO data can be created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False defaultArguments["KeepStepTwoOutput"] = False defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) self.assertTrue(len(testWorkload.getTopLevelTask()) == 1, "Error: Wrong number of top level tasks.") topLevelTask = testWorkload.getTopLevelTask()[0] topLevelStep = topLevelTask.steps() cmsRun2Step = topLevelStep.getStep("cmsRun2").getTypeHelper() self.assertTrue(len(cmsRun2Step.listOutputModules()) == 2, "Error: Wrong number of output modules in cmsRun2.") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testStepChainMC(self): """ Build a StepChain workload starting from scratch """ # Read in the request request = json.load(open(self.jsonTemplate)) testArguments = request['createRequest'] testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "stepchain_t" }) configDocs = injectStepChainConfigMC(self.configDatabase) for s in ['Step1', 'Step2', 'Step3']: testArguments[s]['ConfigCacheID'] = configDocs[s] factory = StepChainWorkloadFactory() # test that we cannot stage out different samples with the same output module self.assertRaises(WMSpecFactoryException, factory.factoryWorkloadConstruction, "TestWorkload", testArguments) testArguments['Step2']['KeepOutput'] = False testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "ProdMinBias", "MCFakeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "production") self.assertEqual(testWorkload.getCampaign(), "Campaign-OVERRIDE-ME") self.assertEqual(testWorkload.getAcquisitionEra(), "CMSSW_7_0_0_pre11") self.assertEqual(testWorkload.getProcessingString(), "START70_V4") self.assertEqual(testWorkload.getProcessingVersion(), 1) self.assertEqual(testWorkload.getPrepID(), "Step-00") self.assertEqual(sorted(testWorkload.getCMSSWVersions()), ['CMSSW_7_0_0_pre11', 'CMSSW_7_0_0_pre12']) # test workload attributes self.assertEqual(testWorkload.processingString, "START70_V4") self.assertEqual(testWorkload.acquisitionEra, "CMSSW_7_0_0_pre11") self.assertEqual(testWorkload.processingVersion, 1) self.assertFalse(testWorkload.lumiList, "Wrong lumiList") self.assertEqual(testWorkload.data.policies.start.policyName, "MonteCarlo") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 10) for t in [ 'ProdMinBias', 'ProdMinBiasMergeRAWSIMoutput', 'RECOPROD1MergeAODSIMoutput', 'RECOPROD1MergeRECOSIMoutput' ]: self.assertTrue(t in tasks, "Wrong task name") self.assertFalse('ProdMinBiasMergeAODSIMoutput' in tasks, "Wrong task name") task = testWorkload.getTask(tasks[0]) self.assertEqual(task.name(), "ProdMinBias") self.assertEqual(task.getPathName(), "/TestWorkload/ProdMinBias") self.assertEqual(task.taskType(), "Production", "Wrong task type") splitParams = task.jobSplittingParameters() self.assertEqual(splitParams['algorithm'], "EventBased", "Wrong job splitting algo") self.assertEqual(splitParams['events_per_job'], 150) self.assertEqual(splitParams['events_per_lumi'], 50) self.assertFalse(splitParams['lheInputFiles'], "Wrong LHE flag") self.assertTrue(splitParams['performance']['timePerEvent'] > 4.75) self.assertTrue(splitParams['performance']['sizePerEvent'] > 1233) self.assertTrue( splitParams['performance']['memoryRequirement'] == 2400) self.assertFalse(task.getTrustSitelists().get('trustlists'), "Wrong input location flag") self.assertFalse(task.inputRunWhitelist(), "Wrong run white list") # test workload step stuff self.assertEqual( sorted(task.listAllStepNames()), ['cmsRun1', 'cmsRun2', 'cmsRun3', 'logArch1', 'stageOut1']) self.assertEqual(task.getTopStepName(), 'cmsRun1') self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertFalse(task.getInputStep(), "Wrong input step") outModsAndDsets = task.listOutputDatasetsAndModules() outMods = set([elem['outputModule'] for elem in outModsAndDsets]) outDsets = [elem['outputDataset'] for elem in outModsAndDsets] self.assertEqual( outMods, set(['RAWSIMoutput', 'AODSIMoutput', 'RECOSIMoutput']), "Wrong output modules") self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterA-START70_V4-v1/GEN-SIM' in outDsets) self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterD-START70_V4-v1/AODSIM' in outDsets) self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterC-START70_V4-v1/GEN-SIM-RECO' in outDsets) self.assertEqual(task.getSwVersion(), 'CMSSW_7_0_0_pre12') self.assertEqual(task.getScramArch(), 'slc5_amd64_gcc481') step = task.getStep("cmsRun1") self.assertFalse(step.data.tree.parent) self.assertFalse(getattr(step.data.input, 'inputStepName', None)) self.assertFalse(getattr(step.data.input, 'inputOutputModule', None)) self.assertEqual(step.data.output.modules.RAWSIMoutput.filterName, 'FilterA') self.assertEqual(step.data.output.modules.RAWSIMoutput.dataTier, 'GEN-SIM') self.assertTrue(step.data.output.keep) self.assertEqual(sorted(step.data.tree.childNames), ['cmsRun2', 'logArch1', 'stageOut1']) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre12') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') step = task.getStep("cmsRun2") self.assertEqual(step.data.tree.parent, "cmsRun1") self.assertEqual(step.data.input.inputStepName, 'cmsRun1') self.assertEqual(step.data.input.inputOutputModule, 'RAWSIMoutput') self.assertEqual(step.data.output.modules.RAWSIMoutput.filterName, 'FilterB') self.assertEqual(step.data.output.modules.RAWSIMoutput.dataTier, 'GEN-SIM-RAW') self.assertFalse(step.data.output.keep) self.assertEqual(step.data.tree.childNames, ["cmsRun3"]) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre11') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') step = task.getStep("cmsRun3") self.assertEqual(step.data.tree.parent, "cmsRun2") self.assertEqual(step.data.input.inputStepName, 'cmsRun2') self.assertEqual(step.data.input.inputOutputModule, 'RAWSIMoutput') self.assertEqual(step.data.output.modules.RECOSIMoutput.filterName, 'FilterC') self.assertEqual(step.data.output.modules.AODSIMoutput.filterName, 'FilterD') self.assertEqual(step.data.output.modules.RECOSIMoutput.dataTier, 'GEN-SIM-RECO') self.assertEqual(step.data.output.modules.AODSIMoutput.dataTier, 'AODSIM') self.assertTrue(step.data.output.keep) self.assertFalse(step.data.tree.childNames) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre11') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') return
def testStepChainSingleStep(self): """ Build a StepChain single step, reading AODSIM and producing MINIAODSIM """ testArguments = StepChainWorkloadFactory.getTestArguments() request = { "Campaign": "TaskForceUnitTest", "CMSSWVersion": "CMSSW_7_5_0", "ScramArch": "slc6_amd64_gcc491", "DbsUrl": "https://cmsweb.cern.ch/dbs/prod/global/DBSReader", "GlobalTag": "PHYS14_25_V3", "AcquisitionEra": "SingleStep", "ProcessingString": "UnitTest_StepChain", "ProcessingVersion": 3, "PrepID": "MainStep", "CouchURL": os.environ["COUCHURL"], "CouchDBName": "stepchain_t", "Memory": 3500, "SizePerEvent": 2600, "TimePerEvent": 26.5, "Step1": { "ConfigCacheID": injectStepChainConfigSingle(self.configDatabase), "GlobalTag": "PHYS14_25_V44", "InputDataset": "/RSGravToGG_kMpl-01_M-5000_TuneCUEP8M1_13TeV-pythia8/RunIISpring15DR74-Asympt50ns_MCRUN2_74_V9A-v1/AODSIM", "SplittingAlgo": "EventAwareLumiBased", "EventsPerJob": 500, "StepName": "StepMini" }, "StepChain": 1 } testArguments.update(request) factory = StepChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepMini", "GravWhatever", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "processing") self.assertEqual(testWorkload.getCampaign(), "TaskForceUnitTest") self.assertEqual(testWorkload.getAcquisitionEra(), "SingleStep") self.assertEqual(testWorkload.getProcessingString(), "UnitTest_StepChain") self.assertEqual(testWorkload.getProcessingVersion(), 3) self.assertEqual(testWorkload.getPrepID(), "MainStep") self.assertEqual(sorted(testWorkload.getCMSSWVersions()), ['CMSSW_7_5_0']) self.assertEqual(testWorkload.data.policies.start.policyName, "Block") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 4) self.assertTrue('StepMiniMergeMINIAODSIMoutput' in tasks) task = testWorkload.getTask(tasks[0]) self.assertEqual(task.taskType(), "Processing", "Wrong task type") splitParams = task.jobSplittingParameters() self.assertEqual(splitParams['algorithm'], "EventAwareLumiBased", "Wrong job splitting algo") self.assertEqual(splitParams['events_per_job'], 500) self.assertTrue(splitParams['performance']['timePerEvent'] > 26.4) self.assertTrue(splitParams['performance']['sizePerEvent'] > 2599) self.assertTrue( splitParams['performance']['memoryRequirement'] == 3500) # test workload step stuff self.assertEqual(sorted(task.listAllStepNames()), ['cmsRun1', 'logArch1', 'stageOut1']) self.assertEqual(task.getTopStepName(), 'cmsRun1') self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertFalse(task.getInputStep(), "Wrong input step") outModsAndDsets = task.listOutputDatasetsAndModules()[0] self.assertEqual(outModsAndDsets['outputModule'], 'MINIAODSIMoutput') self.assertEqual( outModsAndDsets['outputDataset'], '/RSGravToGG_kMpl-01_M-5000_TuneCUEP8M1_13TeV-pythia8/SingleStep-UnitTest_StepChain-v3/MINIAODSIM' ) self.assertEqual(task.getSwVersion(), 'CMSSW_7_5_0') self.assertEqual(task.getScramArch(), 'slc6_amd64_gcc491') step = task.getStep("cmsRun1") self.assertEqual( step.data.application.configuration.arguments.globalTag, 'PHYS14_25_V44') return
} } factory = TaskChainWorkloadFactory() try: self.workload = factory("YankingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) self.workload.setSpecUrl("somespec") self.workload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion'])
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Production', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputA'] expWfTasks = ['/TestWorkload/Production', '/TestWorkload/Production/LogCollect', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', '/TestWorkload/Production/ProductionMergeOutputA', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect'] expFsets = ['FILESET_DEFINED_DURING_RUNTIME', '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/merged-MergedRECO', '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/merged-MergedUSER', '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/unmerged-OutputARECO'] subMaps = ['FILESET_DEFINED_DURING_RUNTIME', (6, '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/LogCollect', 'MinFileBased', 'LogCollect'), (7, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionMergeOutputA', 'ParentlessMergeBySize', 'Merge'), (4, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputB', 'ParentlessMergeBySize', 'Merge')] testArguments = MonteCarloWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = TEST_DB_NAME testArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) myMask = Mask(FirstRun=1, FirstLumi=1, FirstEvent=1, LastRun=1, LastLumi=10, LastEvent=1000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets[0] = topFilesetName # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps[0] = (1, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production') subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps) ### create another top level subscription myMask = Mask(FirstRun=1, FirstLumi=11, FirstEvent=1001, LastRun=1, LastLumi=20, LastEvent=2000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets.append(topFilesetName) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps.append((9, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production')) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput' ] expWfTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', '/TestWorkload/DataProcessing/LogCollect' ] expFsets = [ 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/unmerged-logArchive' ] subMaps = [ (15, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (16, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (14, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (11, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', 'FileBased', 'Skim'), (5, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', 'SiblingProcessingBased', 'Cleanup'), (4, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', 'ParentlessMergeBySize', 'Merge'), (9, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', 'SiblingProcessingBased', 'Cleanup'), (7, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', 'ParentlessMergeBySize', 'Merge'), (17, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', 'SiblingProcessingBased', 'Cleanup'), (13, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', 'ParentlessMergeBySize', 'Merge'), (18, '/TestWorkload/DataProcessing/unmerged-logArchive', '/TestWorkload/DataProcessing/LogCollect', 'MinFileBased', 'LogCollect'), (12, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing', 'EventAwareLumiBased', 'Processing') ] testArguments = ReRecoWorkloadFactory.getTestArguments() testArguments["ConfigCacheID"] = self.injectReRecoConfig() testArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": self.injectSkimConfig() }) testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "rereco_t" testArguments["EnableHarvesting"] = True testArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) print("Tasks producing output:\n%s" % pformat(testWorkload.listOutputProducingTasks())) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") print("List of workflow tasks:\n%s" % pformat([item['task'] for item in workflows])) self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() print("List of filesets:\n%s" % pformat([item[1] for item in filesets])) self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) print("List of subscriptions:\n%s" % pformat(subscriptions)) self.assertItemsEqual(subscriptions, subMaps)
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset = mergedRecoFileset, workflow = promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedPromptSkim, workflow = promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive") promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect") promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = promptSkimLogCollect, workflow = promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = promptSkimMergeLogCollect, workflow = promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'reprocessing') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual( len(repackWorkflow.outputMap), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual( mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual( repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset( name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in viewitems(goldenOutputMods): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. Check that we can drop an output module. """ testArgs = getTestArguments() testArgs['TransientOutputModules'] = ['RECOoutput'] testWorkload = dataProcessingWorkload("TestWorkload", testArgs) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "ALCARECOoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod in testArgs["TransientOutputModules"]: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) else: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: if goldenOutputMod in testArgs["TransientOutputModules"]: # No merge for this output module continue mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") # No merge workflow should exist in WMBS self.assertRaises(IndexError, recoMergeWorkflow.load) unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "ALCARECOoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [ '/TestWorkload/Repack', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW' ] expWfTasks = [ '/TestWorkload/Repack', '/TestWorkload/Repack/LogCollect', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect' ] expFsets = [ 'TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/unmerged-logArchive' ] subMaps = [ (3, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Repack/unmerged-logArchive', '/TestWorkload/Repack/LogCollect', 'MinFileBased', 'LogCollect'), (4, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', 'RepackMerge', 'Merge'), (7, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', 'RepackMerge', 'Merge'), (1, 'TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack', 'Repack', 'Repack') ] testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Repack", blockName='StreamerFiles', cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1": [[2, 4], [8, 50]], "2": [[100, 200], [210, 210]]} lumiDict2 = {"1": [[2, 4], [8, 40]], "2": [[100, 150], [210, 210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 4, "Task1": { "TaskName": "DigiHLT", "ConfigCacheID": processorDocs['DigiHLT'], "InputDataset": "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo": "FileBased", }, "Task2": { "TaskName": "Reco", "InputTask": "DigiHLT", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForReco", "CMSSWVersion": "CMSSW_3_1_2", "ScramArch": "CompatibleRECOArch", "PrimaryDataset": "ZeroBias", "LumiList": lumiDict2, }, "Task3": { "TaskName": "ALCAReco", "InputTask": "Reco", "InputFromOutputModule": "writeALCA", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForALCAReco", "CMSSWVersion": "CMSSW_3_1_3", "ScramArch": "CompatibleALCAArch", }, "Task4": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "YankingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask( self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") self.assertEqual(lumiDict2, reco.getLumiMask()) recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ) self.assertEqual(lumiDict, alca.getLumiMask()) alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ) skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue( "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI not in output datasets" ) self.assertTrue( "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI not in output datasets" ) self.assertTrue( "/ZeroBias/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/ZeroBias/ReleaseValidation-reco-v1/RECO not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/ZeroBias/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): self.assertTrue( "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_RECO'] expWfTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', '/TestWorkload/Reco/LogCollect', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_RECO', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect'] expFsets = ['TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-MergedAOD', '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-MergedRECO', '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/unmerged-write_RECORECO'] subMaps = [(4, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (7, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', 'MinFileBased', 'LogCollect'), (5, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', 'SiblingProcessingBased', 'Cleanup'), (3, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', 'ParentlessMergeBySize', 'Merge'), (11, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', 'SiblingProcessingBased', 'Cleanup'), (9, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', 'ParentlessMergeBySize', 'Merge'), (8, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', 'SiblingProcessingBased', 'Cleanup'), (6, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', 'ParentlessMergeBySize', 'Merge'), (12, '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', 'MinFileBased', 'LogCollect'), (15, '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', 'MinFileBased', 'LogCollect'), (20, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', 'MinFileBased', 'LogCollect'), (18, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (19, '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (23, '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect', 'MinFileBased', 'LogCollect'), (25, '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/LogCollect', 'MinFileBased', 'LogCollect'), (2, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim', 'ParentlessMergeBySize', 'Processing'), (13, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', 'SiblingProcessingBased', 'Cleanup'), (16, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', 'SiblingProcessingBased', 'Cleanup'), (14, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoMergewrite_AOD', 'ParentlessMergeBySize', 'Merge'), (21, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', 'SiblingProcessingBased', 'Cleanup'), (17, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM', 'ParentlessMergeBySize', 'Merge'), (24, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', 'SiblingProcessingBased', 'Cleanup'), (22, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoMergewrite_RECO', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco', 'EventAwareLumiBased', 'Processing')] testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Reco", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = {'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO'] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM') harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return