def getMergeACDCSpec(self, splittingAlgo='ParentlessMergeBySize', splittingArgs=None): """ _getMergeACDCSpec_ Get a ACDC spec for the merge task of a ReReco workload """ if splittingArgs is None: splittingArgs = {} factory = ReRecoWorkloadFactory() rerecoArgs = ReRecoWorkloadFactory.getTestArguments() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) rerecoArgs["Requestor"] = self.user rerecoArgs["Group"] = self.group Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( self.workflowName, rerecoArgs) Tier1ReRecoWorkload.truncate( 'ACDC_%s' % self.workflowName, '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName, self.couchUrl, self.acdcDBName) Tier1ReRecoWorkload.setJobSplittingParameters( '/ACDC_%s/DataProcessingMergeRECOoutput' % self.workflowName, splittingAlgo, splittingArgs) return Tier1ReRecoWorkload
def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) # test default values taskPaths = {'/TestWorkload/DataProcessing': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': ['cmsRun1', 'upload1', 'logArch1']} for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided dataProcArguments["Multicore"] = 6 dataProcArguments["Memory"] = 4600.0 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) if not task.endswith('DQMHarvestMerged') and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), dataProcArguments["Multicore"]) elif step in ('stageOut1', 'upload1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], dataProcArguments["Memory"]) return
def getProcessingACDCSpec(self, splittingAlgo='LumiBased', splittingArgs=None, setLocationFlag=False): """ _getProcessingACDCSpec_ Get a ACDC spec for the processing task of a ReReco workload """ if splittingArgs is None: splittingArgs = {'lumis_per_job': 8} factory = ReRecoWorkloadFactory() rerecoArgs = ReRecoWorkloadFactory.getTestArguments() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) rerecoArgs["Requestor"] = self.user rerecoArgs["Group"] = self.group Tier1ReRecoWorkload = factory.factoryWorkloadConstruction( self.workflowName, rerecoArgs) Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName, '/%s/DataProcessing' % self.workflowName, self.couchUrl, self.acdcDBName) Tier1ReRecoWorkload.setJobSplittingParameters( '/ACDC_%s/DataProcessing' % self.workflowName, splittingAlgo, splittingArgs) Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist) if setLocationFlag: Tier1ReRecoWorkload.setTrustLocationFlag(setLocationFlag) # Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist) return Tier1ReRecoWorkload
def getSchema(groupName = 'PeopleLikeMe', userName = '******'): schema = ReRecoWorkloadFactory.getTestArguments() schema['RequestName'] = 'TestReReco' schema['RequestType'] = 'ReReco' schema['Requestor'] = '%s' % userName schema['Group'] = '%s' % groupName return schema
def getSchema(groupName="PeopleLikeMe", userName="******"): schema = ReRecoWorkloadFactory.getTestArguments() # make sure that ScramArch and CMSSWVersion have valid values # since they would be validated and should pass certain regexp if "ScramArch" not in schema or schema["ScramArch"] == "fake": schema["ScramArch"] = "slc5_amd64_gcc478" if "CMSSWVersion" not in schema or schema["CMSSWVersion"] == "fake": schema["CMSSWVersion"] = "CMSSW_7_0_0" schema["RequestName"] = "TestReReco" schema["RequestType"] = "ReReco" schema["Requestor"] = "%s" % userName schema["Group"] = "%s" % groupName return schema
def getSchema(groupName='PeopleLikeMe', userName='******'): schema = ReRecoWorkloadFactory.getTestArguments() # make sure that ScramArch and CMSSWVersion have valid values # since they would be validated and should pass certain regexp if 'ScramArch' not in schema or schema['ScramArch'] == 'fake': schema['ScramArch'] = 'slc5_amd64_gcc478' if 'CMSSWVersion' not in schema or schema['CMSSWVersion'] == 'fake': schema['CMSSWVersion'] = 'CMSSW_7_0_0' schema['RequestName'] = 'TestReReco' schema['RequestType'] = 'ReReco' schema['Requestor'] = '%s' % userName schema['Group'] = '%s' % groupName return schema
def createReRecoSpec(self, specName, returnType="spec", splitter = None, inputDataset = None, dbsUrl = None, **additionalArgs): # update args, then reset them args = ReRecoWorkloadFactory.getTestArguments() args.update(additionalArgs) args["ConfigCacheID"] = createConfig(args["CouchDBName"]) factory = ReRecoWorkloadFactory() spec = factory.factoryWorkloadConstruction(specName, args) if inputDataset != None: spec.taskIterator().next().data.input.dataset.primary = inputDataset if dbsUrl != None: print(dbsUrl) spec.taskIterator().next().data.input.dataset.dbsurl = dbsUrl return self._selectReturnType(spec, returnType, splitter)
def createReRecoSpec(self, specName, returnType="spec", splitter = None, inputDataset = None, dbsUrl = None, **additionalArgs): # update args, then reset them args = ReRecoWorkloadFactory.getTestArguments() args.update(additionalArgs) args["ConfigCacheID"] = createConfig(args["CouchDBName"]) factory = ReRecoWorkloadFactory() spec = factory.factoryWorkloadConstruction(specName, args) if inputDataset != None: spec.taskIterator().next().data.input.dataset.primary = inputDataset if dbsUrl != None: print dbsUrl spec.taskIterator().next().data.input.dataset.dbsurl = dbsUrl return self._selectReturnType(spec, returnType, splitter)
def getMergeACDCSpec(self, splittingAlgo = 'ParentlessMergeBySize', splittingArgs = {}): """ _getMergeACDCSpec_ Get a ACDC spec for the merge task of a ReReco workload """ factory = ReRecoWorkloadFactory() rerecoArgs = ReRecoWorkloadFactory.getTestArguments() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(self.workflowName, rerecoArgs) Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName, '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName, self.couchUrl, self.acdcDBName) Tier1ReRecoWorkload.setJobSplittingParameters('/ACDC_%s/DataProcessingMergeRECOoutput' % self.workflowName, splittingAlgo, splittingArgs) return Tier1ReRecoWorkload
def createReRecoSpec(self, specName, returnType="spec", splitter = None, assignKwargs={}, **additionalArgs): # update args, then reset them args = ReRecoWorkloadFactory.getTestArguments() args.update(additionalArgs) args["ConfigCacheID"] = createConfig(args["CouchDBName"]) factory = ReRecoWorkloadFactory() spec = factory.factoryWorkloadConstruction(specName, args) if assignKwargs: args = ReRecoWorkloadFactory.getAssignTestArguments() args.update(assignKwargs) spec.updateArguments(args) return self._selectReturnType(spec, returnType, splitter)
def getProcessingACDCSpec(self, splittingAlgo = 'LumiBased', splittingArgs = {'lumis_per_job' : 8}, setLocationFlag = False): """ _getProcessingACDCSpec_ Get a ACDC spec for the processing task of a ReReco workload """ factory = ReRecoWorkloadFactory() rerecoArgs = ReRecoWorkloadFactory.getTestArguments() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(self.workflowName, rerecoArgs) Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName, '/%s/DataProcessing' % self.workflowName, self.couchUrl, self.acdcDBName) Tier1ReRecoWorkload.setJobSplittingParameters('/ACDC_%s/DataProcessing' % self.workflowName, splittingAlgo, splittingArgs) if setLocationFlag: Tier1ReRecoWorkload.setTrustLocationFlag() Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist) return Tier1ReRecoWorkload
def createReRecoSpec(self, specName, returnType="spec", splitter=None, assignKwargs={}, **additionalArgs): # update args, then reset them args = ReRecoWorkloadFactory.getTestArguments() args.update(additionalArgs) args["ConfigCacheID"] = createConfig(args["CouchDBName"]) factory = ReRecoWorkloadFactory() spec = factory.factoryWorkloadConstruction(specName, args) if assignKwargs: args = ReRecoWorkloadFactory.getAssignTestArguments() args.update(assignKwargs) spec.updateArguments(args) return self._selectReturnType(spec, returnType, splitter)
def testFilesets(self): """ _testFilesets_ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput'] expWfTasks = ['/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', '/TestWorkload/DataProcessing/LogCollect'] expFsets = ['TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing/unmerged-RECOoutputRECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-MergedRAW-RECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-MergedUSER', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimARAW-RECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimBUSER', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/unmerged-DQMoutputDQM', '/TestWorkload/DataProcessing/unmerged-logArchive'] subMaps = [(15, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (16, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (14, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (11, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', 'FileBased', 'Skim'), (5, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimARAW-RECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', 'SiblingProcessingBased', 'Cleanup'), (4, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimARAW-RECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', 'ParentlessMergeBySize', 'Merge'), (9, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimBUSER', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', 'SiblingProcessingBased', 'Cleanup'), (7, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimBUSER', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', 'ParentlessMergeBySize', 'Merge'), (17, '/TestWorkload/DataProcessing/unmerged-DQMoutputDQM', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', 'SiblingProcessingBased', 'Cleanup'), (13, '/TestWorkload/DataProcessing/unmerged-DQMoutputDQM', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', 'ParentlessMergeBySize', 'Merge'), (18, '/TestWorkload/DataProcessing/unmerged-logArchive', '/TestWorkload/DataProcessing/LogCollect', 'MinFileBased', 'LogCollect'), (12, '/TestWorkload/DataProcessing/unmerged-RECOoutputRECO', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/DataProcessing/unmerged-RECOoutputRECO', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing', 'EventAwareLumiBased', 'Processing')] testArguments = ReRecoWorkloadFactory.getTestArguments() testArguments["ConfigCacheID"] = self.injectReRecoConfig() testArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": self.injectSkimConfig()}) testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "rereco_t" testArguments["EnableHarvesting"] = True testArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) # test default values taskPaths = { '/TestWorkload/DataProcessing': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': ['cmsRun1', 'upload1', 'logArch1'] } for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided dataProcArguments["Multicore"] = 6 dataProcArguments["Memory"] = 4600.0 dataProcArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) if not task.endswith('DQMHarvestMerged') and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), dataProcArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), dataProcArguments["EventStreams"]) elif step in ('stageOut1', 'upload1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], dataProcArguments["Memory"]) return
def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput' ] expWfTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', '/TestWorkload/DataProcessing/LogCollect' ] expFsets = [ 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/unmerged-logArchive' ] subMaps = [ (15, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (16, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (14, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (11, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', 'FileBased', 'Skim'), (5, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', 'SiblingProcessingBased', 'Cleanup'), (4, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', 'ParentlessMergeBySize', 'Merge'), (9, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', 'SiblingProcessingBased', 'Cleanup'), (7, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', 'ParentlessMergeBySize', 'Merge'), (17, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', 'SiblingProcessingBased', 'Cleanup'), (13, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', 'ParentlessMergeBySize', 'Merge'), (18, '/TestWorkload/DataProcessing/unmerged-logArchive', '/TestWorkload/DataProcessing/LogCollect', 'MinFileBased', 'LogCollect'), (12, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing', 'EventAwareLumiBased', 'Processing') ] testArguments = ReRecoWorkloadFactory.getTestArguments() testArguments["ConfigCacheID"] = self.injectReRecoConfig() testArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": self.injectSkimConfig() }) testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "rereco_t" testArguments["EnableHarvesting"] = True testArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) print("Tasks producing output:\n%s" % pformat(testWorkload.listOutputProducingTasks())) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") print("List of workflow tasks:\n%s" % pformat([item['task'] for item in workflows])) self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() print("List of filesets:\n%s" % pformat([item[1] for item in filesets])) self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) print("List of subscriptions:\n%s" % pformat(subscriptions)) self.assertItemsEqual(subscriptions, subMaps)
from WMQuality.Emulators.DataBlockGenerator.Globals import GlobalParams from WMQuality.Emulators.DBSClient.DBSReader import DBSReader as MockDBSReader from WMQuality.Emulators.SiteDBClient.SiteDB import SiteDBJSON as fakeSiteDB from WMCore.WMSpec.StdSpecs.ReReco import ReRecoWorkloadFactory from WMQuality.Emulators.WMSpecGenerator.Samples.TestMonteCarloWorkload \ import monteCarloWorkload, getMCArgs from WMQuality.Emulators.WMSpecGenerator.WMSpecGenerator import createConfig from WMQuality.Emulators import EmulatorSetup from WMQuality.TestInitCouchApp import TestInitCouchApp from WMCore.BossAir.BossAirAPI import BossAirAPI from WMCore.Configuration import loadConfigurationFile from WMCore.ResourceControl.ResourceControl import ResourceControl rerecoArgs = ReRecoWorkloadFactory.getTestArguments() mcArgs = getMCArgs() def getFirstTask(wmspec): """Return the 1st top level task""" # http://www.logilab.org/ticket/8774 # pylint: disable=E1101,E1103 return wmspec.taskIterator().next() class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def getReRecoArgs(parent=False): """ Returns default values defined in the spec workload """ rerecoArgs = ReRecoWorkloadFactory.getTestArguments() if parent: rerecoArgs.update(IncludeParents="True") return rerecoArgs
""" import unittest from WMCore.WorkQueue.Policy.Start.Dataset import Dataset from WMCore.WMSpec.StdSpecs.ReReco import ReRecoWorkloadFactory from WMCore.Services.EmulatorSwitch import EmulatorHelper from WMCore.Services.DBS.DBSReader import DBSReader from WMCore_t.WMSpec_t.samples.MultiTaskProcessingWorkload \ import workload as MultiTaskProcessingWorkload from WMCore.WorkQueue.WorkQueueExceptions import * from WMCore_t.WorkQueue_t.WorkQueue_t import getFirstTask from WMQuality.Emulators.DataBlockGenerator import Globals from WMQuality.Emulators.WMSpecGenerator.WMSpecGenerator import createConfig rerecoArgs = ReRecoWorkloadFactory.getTestArguments() parentProcArgs = ReRecoWorkloadFactory.getTestArguments() parentProcArgs.update(IncludeParents = "True") class DatasetTestCase(unittest.TestCase): splitArgs = dict(SliceType = 'NumberOfFiles', SliceSize = 5) def setUp(self): Globals.GlobalParams.resetParams() EmulatorHelper.setEmulators(phedex = True, dbs = True, siteDB = True, requestMgr = False) def tearDown(self): EmulatorHelper.resetEmulators()
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"RECOoutput": "RECO", "DQMoutput": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = procWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") unmergedReco.loadData() recoMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset( name="/TestWorkload/DataProcessing/unmerged-DQMoutputDQM") unmergedDqm.loadData() dqmMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim" ) skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO" ) topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged" ) dqmWorkflow.load() topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM" ) topLevelFileset.loadData() dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "DQMoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset(name = "/TestWorkload/DataProcessing/unmerged-DQMoutput") unmergedDqm.loadData() dqmMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDqm, workflow = dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "DQMoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return