Example #1
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = StoreResultsWorkloadFactory.getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})

        factory = StoreResultsWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload",
                                                           arguments)

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        return
Example #2
0
    def testD_Exhausted(self):
        """
        _testExhausted_

        Test that the system can exhaust jobs correctly
        """
        workloadName = 'TestWorkload'

        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs, retry_count=5,
                                               workloadPath=workloadPath)

        config = self.getConfig()
        config.ErrorHandler.maxRetries = 1
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testSubscription = Subscription(id=1)  # You should only have one
        testSubscription.load()
        testSubscription.loadData()

        # Do we have files to start with?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 2)


        testErrorHandler = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler.reqAuxDB = None
        testErrorHandler.setup(None)
        testErrorHandler.algorithm(None)


        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)



        # Did we fail the files?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 2)
Example #3
0
    def testD_Exhausted(self):
        """
        _testExhausted_

        Test that the system can exhaust jobs correctly
        """
        workloadName = "TestWorkload"

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, "workloadTest", "TestWorkload", "WMSandbox", "WMWorkload.pkl")

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs, retry_count=5, workloadPath=workloadPath)

        config = self.getConfig()
        config.ErrorHandler.maxRetries = 1
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, "created", "new")
        changer.propagate(testJobGroup.jobs, "executing", "created")
        changer.propagate(testJobGroup.jobs, "complete", "executing")
        changer.propagate(testJobGroup.jobs, "jobfailed", "complete")

        testSubscription = Subscription(id=1)  # You should only have one
        testSubscription.load()
        testSubscription.loadData()

        # Do we have files to start with?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 2)

        testErrorHandler = ErrorHandlerPoller(config)
        testErrorHandler.setup(None)
        testErrorHandler.algorithm(None)

        idList = self.getJobs.execute(state="JobFailed")
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state="JobCooloff")
        self.assertEqual(len(idList), 0)

        idList = self.getJobs.execute(state="Exhausted")
        self.assertEqual(len(idList), self.nJobs)

        # Did we fail the files?
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 2)
Example #4
0
    def _commonMonteCarloTest(self):
        """
        Retrieve the workload from WMBS and test all its properties.
        
        """
        prodWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/Production")
        prodWorkflow.load()

        self.assertEqual(len(prodWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Production-SomeBlock")
        topLevelFileset.loadData()

        prodSubscription = Subscription(fileset = topLevelFileset, workflow = prodWorkflow)
        prodSubscription.loadData()

        self.assertEqual(prodSubscription["type"], "Production",
                         "Error: Wrong subscription type.")
        self.assertEqual(prodSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            unmergedOutput = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName)
            unmergedOutput.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                            task = "/TestWorkload/Production/ProductionMerge%s" % outputName)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for outputName in ["OutputA", "OutputB"]:
            unmerged = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/Production/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Production/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            mergeLogCollect = Fileset(name = "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName)
            mergeLogCollect.loadData()
            mergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                              task = "/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (outputName, outputName))
            mergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectWorkflow)
            logCollectSub.loadData()
            
            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")
Example #5
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ProcessingString"] = "ProcString"
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({"SkimName1": "SomeSkim",
                                  "SkimInput1": "RECOoutput",
                                  "Skim1ConfigCacheID": skimConfig})
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig()

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \
                         tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \
                         Merged.mergedLFNBase,
                         '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = {"RECOoutput": "RECO", "DQMoutput": "DQM"}
        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            mergedOutput = procWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        unmergedDqm = Fileset(name="/TestWorkload/DataProcessing/unmerged-DQMoutputDQM")
        unmergedDqm.loadData()
        dqmMergeWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput")
        dqmMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            unmerged = Fileset(name="/TestWorkload/DataProcessing/unmerged-%s" % fset)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name="/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"}
        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset)
            unmerged.loadData()
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in goldenOutputMods:
            skimMergeLogCollect = Fileset(
                name="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                   task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % (
                                                       skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        dqmWorkflow = Workflow(name="TestWorkload",
                               task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM")
        topLevelFileset.loadData()

        dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmHarvestLogCollect = Fileset(
            name="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload",
                                                task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #6
0
    def _checkTask(self, task, taskConf, centralConf):
        """
        _checkTask_

        Verify the correctness of the task

        """
        if "InputTask" in taskConf:
            inpTaskPath = task.getPathName()
            inpTaskPath = inpTaskPath.replace(task.name(), "")
            inpTaskPath += "cmsRun1"
            self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec")
            self.assertTrue(taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task")

        if "MCPileup" in taskConf or "DataPileup" in taskConf:
            mcDataset = taskConf.get('MCPileup', None)
            dataDataset = taskConf.get('DataPileup', None)
            if mcDataset:
                self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset])
            if dataDataset:
                self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset])

        workflow = Workflow(name = self.workload.name(),
                            task = task.getPathName())
        workflow.load()

        outputMods = outputModuleList(task)
        self.assertEqual(len(workflow.outputMap.keys()), len(outputMods),
                         "Error: Wrong number of WF outputs")

        for outputModule in outputMods:
            filesets = workflow.outputMap[outputModule][0]
            merged = filesets['merged_output_fileset']
            unmerged = filesets['output_fileset']

            merged.loadData()
            unmerged.loadData()

            mergedset = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-Merged"
            if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \
                or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []):
                mergedset = task.getPathName() + "/unmerged-" + outputModule
            unmergedset = task.getPathName() + "/unmerged-" + outputModule

            self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong")
            self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name  is wrong")

            if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \
                and outputModule not in taskConf.get("TransientOutputModules", []) \
                and outputModule not in centralConf.get("IgnoredOutputModules", []):
                mergeTask = task.getPathName() + "/" + task.name() + "Merge" + outputModule

                mergeWorkflow = Workflow(name = self.workload.name(),
                                         task = mergeTask)
                mergeWorkflow.load()
                self.assertTrue("Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key")
                mergedOutputMod = mergeWorkflow.outputMap['Merged'][0]
                mergedFileset = mergedOutputMod['merged_output_fileset']
                unmergedFileset = mergedOutputMod['output_fileset']
                mergedFileset.loadData()
                unmergedFileset.loadData()
                self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong")
                self.assertEqual(unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong")

                mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset']
                umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset']
                mrgLogArch.loadData()
                umrgLogArch.loadData()

                archName = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-logArchive"

                self.assertEqual(mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task")
                self.assertEqual(umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task")

            if outputModule != "logArchive":
                taskOutputMods = task.getOutputModulesForStep(stepName = "cmsRun1")
                currentModule = getattr(taskOutputMods, outputModule)
                if "PrimaryDataset" in taskConf:
                    self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset")
                processedDatasetParts = ["AcquisitionEra, ProcessingString, ProcessingVersion"]
                allParts = True
                for part in processedDatasetParts:
                    if part in taskConf:
                        self.asserTrue(part in currentModule.processedDataset, "Wrong processed dataset for module")
                    else:
                        allParts = False
                if allParts:
                    print "yes"
                    self.assertEqual("%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"],
                                                   taskConf["ProcessingVersion"]), "Wrong processed dataset for module")

        # Test subscriptions
        if "InputTask" not in taskConf:
            inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name())
        elif "Merge" in task.getPathName().split("/")[-2]:
            inpTaskPath = task.getPathName().replace(task.name(), "")
            inputFileset = inpTaskPath + "merged-Merged"
        else:
            inpTaskPath = task.getPathName().replace(task.name(), "")
            inputFileset = inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"]
        taskFileset = Fileset(name = inputFileset)
        taskFileset.loadData()

        taskSubscription = Subscription(fileset = taskFileset, workflow = workflow)
        taskSubscription.loadData()

        if "InputTask" not in taskConf and "InputDataset" not in taskConf:
            # Production type
            self.assertEqual(taskSubscription["type"], "Production",
                             "Error: Wrong subscription type for processing task")
            self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"],
                             "Error: Wrong split algo for generation task")
        else:
            # Processing type
            self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task")
            if taskSubscription["split_algo"] != "WMBSMergeBySize":
                self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch")
            else:
                self.assertEqual(taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"],
                                 "Subscription uses WMBSMergeBySize on a merge fileset")

        return
Example #7
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = MonteCarloFromGENWorkloadFactory.getTestArguments()
        arguments["ConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias"

        factory = MonteCarloFromGENWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments)

        outputDatasets = testWorkload.listOutputDatasets()
        self.assertEqual(len(outputDatasets), 2)
        self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets)
        self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets)

        productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN')
        splitting = productionTask.jobSplittingParameters()
        self.assertFalse(splitting["deterministicPileup"])

        testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'production')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Production",
                         "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #8
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch',
                                   ceName = 'site1', plugin = "TestPlugin")
        resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch',
                                   ceName = 'site2', plugin = "TestPlugin")        

        testWorkload = self.createTestWMSpec()
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir)
        testWMBSHelper.createSubscription()

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name,
                                                         "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name = "TestWorkload",
                                 task = "/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name = "TestWorkload",
                                 task = "/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name,
                                                            "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")        

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name,
                                                        "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name)
        self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Unmerged output fileset is wrong.")
        self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")                

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")        

        skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Example #9
0
    def _stepTwoTaskTest(self):
        """
        _stepTwoTaskTest_
        
        """
        alcaRecoTask = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo",
        )
        alcaRecoTask.load()
        self.assertEqual(len(alcaRecoTask.outputMap.keys()), 3, "Error: Wrong number of WF outputs.")

        # output modules
        goldenOutputMods = ["OutputC", "OutputD"]

        for o in goldenOutputMods:
            mergedOutput = alcaRecoTask.outputMap[o][0]["merged_output_fileset"]
            unmergedOutput = alcaRecoTask.outputMap[o][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged"
                % o,
                ("Error: Merged output fileset is wrong: %s" % mergedOutput.name),
            )
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o,
                ("Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name),
            )

        logArchOutput = alcaRecoTask.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaRecoTask.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive",
            ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name),
        )
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive",
            ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name),
        )

        for o in goldenOutputMods:
            mergeTask = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s"
                % o,
            )
            mergeTask.load()
            self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.")
            mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"]
            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()
            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged"
                % o,
                ("Error: Merged output fileset is wrong: %s" % mergedMergeOutput.name),
            )
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged"
                % o,
                ("Error: Unmerged output fileset is wrong: %s" % unmergedMergeOutput.name),
            )
            logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()
            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive"
                % o,
                ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name),
            )
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive"
                % o,
                ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name),
            )

        for o in goldenOutputMods:
            unmerged = Fileset(
                name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s"
                % o
            )
            unmerged.loadData()
            mergeTask = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s"
                % o,
            )
            mergeTask.load()
            mergeSubscription = Subscription(fileset=unmerged, workflow=mergeTask)
            mergeSubscription.loadData()
            self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.")

        for o in goldenOutputMods:
            unmerged = Fileset(
                name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s"
                % o
            )
            unmerged.loadData()
            cleanupTask = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoCleanupUnmerged%s"
                % o,
            )
            cleanupTask.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask)
            cleanupSubscription.loadData()
            self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.")

        for o in goldenOutputMods:
            alcaRecoMergeLogCollect = Fileset(
                name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive"
                % o
            )
            alcaRecoMergeLogCollect.loadData()
            alcaRecoMergeLogCollectTask = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/StepTwo%sMergeLogCollect"
                % (o, o),
            )
            alcaRecoMergeLogCollectTask.load()
            logCollectSub = Subscription(fileset=alcaRecoMergeLogCollect, workflow=alcaRecoMergeLogCollectTask)
            logCollectSub.loadData()
            self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
Example #10
0
    def testPromptReco(self):
        """
        _testPromptReco_

        Create a Prompt Reconstruction workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = getTestArguments()

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name = "TestWorkload",
                                    task = "/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                        "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                              "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged")
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(recoSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #11
0
    def testExpress(self):
        """
        _testExpress_

        Create an Express workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = ExpressWorkloadFactory.getTestArguments()
        testArguments.update(deepcopy(REQUEST))
        testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0"

        factory = ExpressWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Express", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        expressWorkflow = Workflow(name="TestWorkload",
                                   task="/TestWorkload/Express")
        expressWorkflow.load()
        self.assertEqual(len(expressWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1,
                         "Error: Wrong number of WF outputs in the Express WF.")

        goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT",
                            "write_StreamExpress_ALCARECO": "ALCARECO",
                            "write_StreamExpress_DQMIO": "DQMIO"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = expressWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = expressWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_StreamExpress_ALCARECO":
                self.assertEqual(mergedOutput.name,
                                 "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Express/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = expressWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Express/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                         "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = {"ALCARECOStreamPromptCalibProd": "ALCAPROMPT",
                            "ALCARECOStreamTkAlMinBias": "ALCARECO"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name,
                             "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for dqmString in ["ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged",
                          "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged"]:
            dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString

            dqmWorkflow = Workflow(name="TestWorkload",
                                   task=dqmTask)
            dqmWorkflow.load()

            logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "%s/unmerged-logArchive" % dqmTask,
                             "Error: LogArchive output fileset is wrong.")
            self.assertEqual(unmergedLogArchOutput.name,
                             "%s/unmerged-logArchive" % dqmTask,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT",
                            "write_StreamExpress_DQMIO": "DQMIO"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Express")
        topLevelFileset.loadData()

        expressSubscription = Subscription(fileset=topLevelFileset, workflow=expressWorkflow)
        expressSubscription.loadData()

        self.assertEqual(expressSubscription["type"], "Express",
                         "Error: Wrong subscription type.")
        self.assertEqual(expressSubscription["split_algo"], "Express",
                         "Error: Wrong split algorithm. %s" % expressSubscription["split_algo"])

        alcaRecoFileset = Fileset(name="/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Express",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "ExpressMerge",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(
            name="/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = {"write_PrimaryDataset1_FEVT": "FEVT",
                           "write_StreamExpress_DQMIO": "DQMIO"}
        for unmergedOutput, tier in unmergedOutputs.items():
            fset = unmergedOutput + tier
            unmergedDataTier = Fileset(name="/TestWorkload/Express/unmerged-%s" % fset)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name="TestWorkload",
                                             task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ExpressMerge",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = {"write_PrimaryDataset1_FEVT": "FEVT",
                            "write_StreamExpress_ALCARECO": "ALCARECO",
                            "write_StreamExpress_DQMIO": "DQMIO"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            unmergedFileset = Fileset(name="/TestWorkload/Express/unmerged-%s" % fset)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/Express/ExpressCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        expressLogCollect = Fileset(name="/TestWorkload/Express/unmerged-logArchive")
        expressLogCollect.loadData()
        expressLogCollectWorkflow = Workflow(name="TestWorkload",
                                             task="/TestWorkload/Express/ExpressLogCollect")
        expressLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=expressLogCollect, workflow=expressLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload",
                                              task="/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO"]
        for goldenOutputMod in goldenOutputMods:
            expressMergeLogCollect = Fileset(
                name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod)
            expressMergeLogCollect.loadData()
            expressMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                      task="/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect" % (
                                                          goldenOutputMod, goldenOutputMod))
            expressMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset=expressMergeLogCollect,
                                                  workflow=expressMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        for dqmStrings in [("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged",
                            "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect"),
                           ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged",
                            "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect")]:
            dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \
                         dqmStrings[0]
            dqmHarvestLogCollect = Fileset(name=dqmFileset)
            dqmHarvestLogCollect.loadData()

            dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings
            dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task=dqmTask)
            dqmHarvestLogCollectWorkflow.load()

            logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Example #12
0
    def testPromptReco(self):
        """
        _testPromptReco_

        Create a Prompt Reconstruction workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = PromptRecoWorkloadFactory.getTestArguments()
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["EnableHarvesting"] = True

        factory = PromptRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name = "TestWorkload",
                                    task = "/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                        "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                              "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(recoSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #13
0
    def testExpress(self):
        """
        _testExpress_

        Create an Express workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = ExpressWorkloadFactory.getTestArguments()
        testArguments.update(deepcopy(REQUEST))
        testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0"

        factory = ExpressWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Express",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        expressWorkflow = Workflow(name="TestWorkload",
                                   task="/TestWorkload/Express")
        expressWorkflow.load()
        self.assertEqual(
            len(expressWorkflow.outputMap.keys()),
            len(testArguments["Outputs"]) + 1,
            "Error: Wrong number of WF outputs in the Express WF.")

        goldenOutputMods = {
            "write_PrimaryDataset1_FEVT": "FEVT",
            "write_StreamExpress_ALCARECO": "ALCARECO",
            "write_StreamExpress_DQMIO": "DQMIO"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = expressWorkflow.outputMap[fset][0][
                "merged_output_fileset"]
            unmergedOutput = expressWorkflow.outputMap[fset][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_StreamExpress_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" %
                    (goldenOutputMod, tier),
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Express/unmerged-%s" % fset,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = expressWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Express/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Express/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO"
        )
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = {
            "ALCARECOStreamPromptCalibProd": "ALCAPROMPT",
            "ALCARECOStreamTkAlMinBias": "ALCARECO"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = alcaSkimWorkflow.outputMap[fset][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s"
                % fset, "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s"
                % fset, "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for dqmString in [
                "ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged",
                "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged"
        ]:
            dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString

            dqmWorkflow = Workflow(name="TestWorkload", task=dqmTask)
            dqmWorkflow.load()

            logArchOutput = dqmWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "%s/unmerged-logArchive" % dqmTask,
                             "Error: LogArchive output fileset is wrong.")
            self.assertEqual(unmergedLogArchOutput.name,
                             "%s/unmerged-logArchive" % dqmTask,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = {
            "write_PrimaryDataset1_FEVT": "FEVT",
            "write_StreamExpress_DQMIO": "DQMIO"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" %
                (goldenOutputMod, tier),
                "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" %
                (goldenOutputMod, tier),
                "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Express")
        topLevelFileset.loadData()

        expressSubscription = Subscription(fileset=topLevelFileset,
                                           workflow=expressWorkflow)
        expressSubscription.loadData()

        self.assertEqual(expressSubscription["type"], "Express",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            expressSubscription["split_algo"], "Express",
            "Error: Wrong split algorithm. %s" %
            expressSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name=
            "/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO"
        )
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Express",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "ExpressMerge",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(
            name=
            "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO"
        )
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset,
                                       workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = {
            "write_PrimaryDataset1_FEVT": "FEVT",
            "write_StreamExpress_DQMIO": "DQMIO"
        }
        for unmergedOutput, tier in unmergedOutputs.items():
            fset = unmergedOutput + tier
            unmergedDataTier = Fileset(
                name="/TestWorkload/Express/unmerged-%s" % fset)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "ExpressMerge",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = {
            "write_PrimaryDataset1_FEVT": "FEVT",
            "write_StreamExpress_ALCARECO": "ALCARECO",
            "write_StreamExpress_DQMIO": "DQMIO"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            unmergedFileset = Fileset(
                name="/TestWorkload/Express/unmerged-%s" % fset)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Express/ExpressCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        expressLogCollect = Fileset(
            name="/TestWorkload/Express/unmerged-logArchive")
        expressLogCollect.loadData()
        expressLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Express/ExpressLogCollect")
        expressLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=expressLogCollect,
                                     workflow=expressLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name=
            "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive"
        )
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect"
        )
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = [
            "write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO"
        ]
        for goldenOutputMod in goldenOutputMods:
            expressMergeLogCollect = Fileset(
                name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" %
                goldenOutputMod)
            expressMergeLogCollect.loadData()
            expressMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            expressMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=expressMergeLogCollect,
                workflow=expressMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        for dqmStrings in [
            ("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged",
             "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect"
             ),
            ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged",
             "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect"
             )
        ]:
            dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \
                         dqmStrings[0]
            dqmHarvestLogCollect = Fileset(name=dqmFileset)
            dqmHarvestLogCollect.loadData()

            dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings
            dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload",
                                                    task=dqmTask)
            dqmHarvestLogCollectWorkflow.load()

            logCollectSub = Subscription(fileset=dqmHarvestLogCollect,
                                         workflow=dqmHarvestLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Example #14
0
    def testReRecoDroppingRECO(self):
        """
        _testReRecoDroppingRECO_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on. This tests run on unmerged RECO output
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ProcessingString"] = "ProcString"
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({"SkimName1": "SomeSkim",
                                  "SkimInput1": "RECOoutput",
                                  "Skim1ConfigCacheID": skimConfig})
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["TransientOutputModules"] = ["RECOoutput"]
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = recoConfig

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \
                         SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \
                         Merged.mergedLFNBase,
                         '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutput")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                   task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (
                                                   skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Example #15
0
    def _commonMonteCarloTest(self):
        """
        Retrieve the workload from WMBS and test all its properties.
        """
        prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production")
        prodWorkflow.load()

        self.assertEqual(len(prodWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock")
        topLevelFileset.loadData()

        prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow)
        prodSubscription.loadData()

        self.assertEqual(prodSubscription["type"], "Production",
                         "Error: Wrong subscription type.")
        self.assertEqual(prodSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName)
            unmergedOutput.loadData()
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Production/ProductionMerge%s" % outputName)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for outputName in ["OutputA", "OutputB"]:
            unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % outputName)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Production/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            mergeLogCollect = Fileset(name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName)
            mergeLogCollect.loadData()
            mergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                               task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (
                                                   outputName, outputName))
            mergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")
Example #16
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   pnn='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper._createSubscriptionsInWMBS(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Example #17
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   pnn='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Example #18
0
class JobGroup(WMBSBase, WMJobGroup):
    """
    A group (set) of Jobs
    """
    def __init__(self,
                 subscription=None,
                 jobs=None,
                 id=-1,
                 uid=None,
                 location=None):
        WMBSBase.__init__(self)
        WMJobGroup.__init__(self, subscription=subscription, jobs=jobs)

        self.id = id
        self.lastUpdate = None
        self.uid = uid

        if location != None:
            self.setSite(location)

        return

    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        #overwrite base class self.output for WMBS fileset
        self.output = Fileset(name=makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname="JobGroup.New")
        action.execute(self.uid,
                       self.subscription["id"],
                       self.output.id,
                       conn=self.getDBConn(),
                       transaction=self.existingTransaction())

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return

    def delete(self):
        """
        Remove a jobgroup from WMBS
        """
        deleteAction = self.daofactory(classname="JobGroup.Delete")
        deleteAction.execute(id=self.id,
                             conn=self.getDBConn(),
                             transaction=self.existingTransaction())

        return

    def exists(self):
        """
        Does a jobgroup exist with id if id is not provided, use the uid,
        return the id
        """
        if self.id != -1:
            action = self.daofactory(classname="JobGroup.ExistsByID")
            result = action.execute(id=self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())
        else:
            action = self.daofactory(classname="JobGroup.Exists")
            result = action.execute(uid=self.uid,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        return result

    def load(self):
        """
        _load_

        Load all meta data associated with the JobGroup.  This includes the
        JobGroup id, uid, last_update time, subscription id and output fileset
        id.  Either the JobGroup id or uid must be specified for this to work.
        """
        existingTransaction = self.beginTransaction()

        if self.id > 0:
            loadAction = self.daofactory(classname="JobGroup.LoadFromID")
            result = loadAction.execute(self.id,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())
        else:
            loadAction = self.daofactory(classname="JobGroup.LoadFromUID")
            result = loadAction.execute(self.uid,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

        self.id = result["id"]
        self.uid = result["uid"]
        self.lastUpdate = result["last_update"]

        self.subscription = Subscription(id=result["subscription"])
        self.subscription.load()

        self.output = Fileset(id=result["output"])
        self.output.load()

        self.jobs = []
        self.commitTransaction(existingTransaction)
        return

    def loadData(self):
        """
        _loadData_

        Load all data that is associated with the jobgroup.  This includes
        loading all the subscription information, the output fileset
        information and all the jobs that are associated with the group.
        """
        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        self.subscription.loadData()
        self.output.loadData()

        loadAction = self.daofactory(classname="JobGroup.LoadJobs")
        result = loadAction.execute(self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        self.jobs = []
        self.newjobs = []

        for jobID in result:
            newJob = Job(id=jobID["id"])
            newJob.loadData()
            self.add(newJob)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def commit(self):
        """
        _commit_

        Write any new jobs to the database, creating them in the database if
        necessary.
        """
        existingTransaction = self.beginTransaction()

        if self.id == -1:
            self.create()

        for j in self.newjobs:
            j.create(group=self)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def setSite(self, site_name=None):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.SetSite")
        result = action.execute(site_name=site_name,
                                jobGroupID=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def getSite(self):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.GetSite")
        result = action.execute(jobGroupID=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def listJobIDs(self):
        """
        Returns a list of job IDs
        Useful for times when threading the loading of jobGroups, where running loadData can overload UUID
        """

        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        loadAction = self.daofactory(classname="JobGroup.LoadJobs")
        result = loadAction.execute(self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        jobIDList = []

        for jobID in result:
            jobIDList.append(jobID["id"])

        self.commitTransaction(existingTransaction)
        return jobIDList

    def commitBulk(self):
        """
        Creates jobs in a group instead of singly, as is done in jobGroup.commit()
        """

        myThread = threading.currentThread()

        if self.id == -1:
            myThread.transaction.begin()
            #existingTransaction = self.beginTransaction()
            self.create()
            #self.commitTransaction(existingTransaction)
            myThread.transaction.commit()

        existingTransaction = self.beginTransaction()

        listOfJobs = []
        for job in self.newjobs:
            #First do all the header stuff
            if job["id"] != None:
                continue

            job["jobgroup"] = self.id

            if job["name"] == None:
                job["name"] = makeUUID()

            listOfJobs.append(job)

        bulkAction = self.daofactory(classname="Jobs.New")
        result = bulkAction.execute(jobList=listOfJobs)

        #Use the results of the bulk commit to get the jobIDs
        fileDict = {}
        for job in listOfJobs:
            job['id'] = result[job['name']]
            fileDict[job['id']] = []
            for file in job['input_files']:
                fileDict[job['id']].append(file['id'])

        maskAction = self.daofactory(classname="Masks.New")
        maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        fileAction = self.daofactory(classname="Jobs.AddFiles")
        fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)

        return

    def getLocationsForJobs(self):
        """
        Gets a list of the locations that jobs can run at
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.GetLocationsForJobs")
        result = action.execute(id=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def __str__(self):
        """
        __str__

        Print out some information about the jobGroup
        as if jobGroup inherited from dict()
        """

        d = {
            'id': self.id,
            'uid': self.uid,
            'subscription': self.subscription,
            'output': self.output,
            'jobs': self.jobs,
            'newjobs': self.newjobs
        }

        return str(d)
Example #19
0
    def testDataProcessing(self):
        """
        _testDataProcessing_

        Create a data processing workflow and verify it installs into WMBS
        correctly.
        """
        testWorkload = dataProcessingWorkload("TestWorkload", getTestArguments())
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")        

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/DataProcessingoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/DataProcessingoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")                

        return
Example #20
0
    def _generationTaskTest(self):
        # retrieve task from the installed workflow
        genTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation")
        genTask.load()
        self.assertEqual(len(genTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.")

        # output modules
        goldenOutputMods = ["OutputA"]

        for o in goldenOutputMods:
            mergedOutput = genTask.outputMap[o][0]["merged_output_fileset"]
            unmergedOutput = genTask.outputMap[o][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o,
                "Error: Merged output fileset is wrong: %s" % mergedOutput.name,
            )
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Generation/unmerged-%s" % o,
                "Error: Unmerged output fileset is wrong.",
            )

        logArchOutput = genTask.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = genTask.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Generation/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.",
        )
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Generation/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.",
        )

        for o in goldenOutputMods:
            mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o)
            mergeTask.load()
            self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.")
            mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"]
            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()
            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o,
                "Error: Merged output fileset is wrong.",
            )
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o,
                "Error: Unmerged output fileset is wrong.",
            )
            logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()
            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o,
                "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name,
            )
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o,
                "Error: LogArchive output fileset is wrong.",
            )

        topLevelFileset = Fileset(name="TestWorkload-Generation-SomeBlock")
        topLevelFileset.loadData()
        prodSubscription = Subscription(fileset=topLevelFileset, workflow=genTask)
        prodSubscription.loadData()
        self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.")
        self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.")

        for o in goldenOutputMods:
            unmergedOutput = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o)
            unmergedOutput.loadData()
            mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o)
            mergeTask.load()
            mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeTask)
            mergeSubscription.loadData()
            self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"],
                "ParentlessMergeBySize",
                "Error: Wrong split algo: %s" % mergeSubscription["split_algo"],
            )

        for o in goldenOutputMods:
            unmerged = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o)
            unmerged.loadData()
            cleanupTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationCleanupUnmerged%s" % o)
            cleanupTask.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask)
            cleanupSubscription.loadData()
            self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.")

        genLogCollect = Fileset(name="/TestWorkload/Generation/unmerged-logArchive")
        genLogCollect.loadData()
        genLogCollectTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenLogCollect")
        genLogCollectTask.load()
        logCollectSub = Subscription(fileset=genLogCollect, workflow=genLogCollectTask)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")

        for o in goldenOutputMods:
            mergeLogCollect = Fileset(name="/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o)
            mergeLogCollect.loadData()
            mergeLogCollectTask = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Generation/GenerationMerge%s/Generation%sMergeLogCollect" % (o, o),
            )
            mergeLogCollectTask.load()
            logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectTask)
            logCollectSub.loadData()
            self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
Example #21
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll only test the skims here.
        """
        skimConfig = self.injectSkimConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "outputRECORECO",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()
            
            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                             "Error: Wrong split algo.")

        return
Example #22
0
class JobGroup(WMBSBase, WMJobGroup):
    """
    A group (set) of Jobs
    """
    def __init__(self, subscription = None, jobs = None, id = -1, uid = None, location = None):
        WMBSBase.__init__(self)
        WMJobGroup.__init__(self, subscription=subscription, jobs = jobs)

        self.id = id
        self.lastUpdate = None
        self.uid = uid

        if location != None:
            self.setSite(location)

        return

    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        #overwrite base class self.output for WMBS fileset
        self.output = Fileset(name = makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname = "JobGroup.New")
        action.execute(self.uid, self.subscription["id"],
                       self.output.id, conn = self.getDBConn(),
                       transaction = self.existingTransaction())

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return

    def delete(self):
        """
        Remove a jobgroup from WMBS
        """
        deleteAction = self.daofactory(classname = "JobGroup.Delete")
        deleteAction.execute(id = self.id, conn = self.getDBConn(),
                             transaction = self.existingTransaction())

        return

    def exists(self):
        """
        Does a jobgroup exist with id if id is not provided, use the uid,
        return the id
        """
        if self.id != -1:
            action = self.daofactory(classname = "JobGroup.ExistsByID")
            result =  action.execute(id = self.id, conn = self.getDBConn(),
                                     transaction = self.existingTransaction())
        else:
            action = self.daofactory(classname = "JobGroup.Exists")
            result = action.execute(uid = self.uid, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        return result

    def load(self):
        """
        _load_

        Load all meta data associated with the JobGroup.  This includes the
        JobGroup id, uid, last_update time, subscription id and output fileset
        id.  Either the JobGroup id or uid must be specified for this to work.
        """
        existingTransaction = self.beginTransaction()

        if self.id > 0:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromID")
            result = loadAction.execute(self.id, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())
        else:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromUID")
            result = loadAction.execute(self.uid, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

        self.id = result["id"]
        self.uid = result["uid"]
        self.lastUpdate = result["last_update"]

        self.subscription = Subscription(id = result["subscription"])
        self.subscription.load()

        self.output = Fileset(id = result["output"])
        self.output.load()

        self.jobs = []
        self.commitTransaction(existingTransaction)
        return

    def loadData(self):
        """
        _loadData_

        Load all data that is associated with the jobgroup.  This includes
        loading all the subscription information, the output fileset
        information and all the jobs that are associated with the group.
        """
        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        self.subscription.loadData()
        self.output.loadData()

        loadAction = self.daofactory(classname = "JobGroup.LoadJobs")
        result = loadAction.execute(self.id, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        self.jobs = []
        self.newjobs = []

        for jobID in result:
            newJob = Job(id = jobID["id"])
            newJob.loadData()
            self.add(newJob)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def commit(self):
        """
        _commit_

        Write any new jobs to the database, creating them in the database if
        necessary.
        """
        existingTransaction = self.beginTransaction()

        if self.id == -1:
            self.create()

        for j in self.newjobs:
            j.create(group = self)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return


    def setSite(self, site_name = None):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.SetSite")
        result = action.execute(site_name = site_name, jobGroupID = self.id,
                                conn = self.getDBConn(), transaction = self.existingTransaction())

        return result


    def getSite(self):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.GetSite")
        result = action.execute(jobGroupID = self.id, conn = self.getDBConn(),
                                transaction = self.existingTransaction())

        return result

    def listJobIDs(self):
        """
        Returns a list of job IDs
        Useful for times when threading the loading of jobGroups, where running loadData can overload UUID
        """

        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        loadAction = self.daofactory(classname = "JobGroup.LoadJobs")
        result = loadAction.execute(self.id, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        jobIDList = []

        for jobID in result:
            jobIDList.append(jobID["id"])

        self.commitTransaction(existingTransaction)
        return jobIDList


    def commitBulk(self):
        """
        Creates jobs in a group instead of singly, as is done in jobGroup.commit()
        """

        myThread = threading.currentThread()

        if self.id == -1:
            myThread.transaction.begin()
            #existingTransaction = self.beginTransaction()
            self.create()
            #self.commitTransaction(existingTransaction)
            myThread.transaction.commit()

        existingTransaction = self.beginTransaction()

        listOfJobs = []
        for job in self.newjobs:
            #First do all the header stuff
            if job["id"] != None:
                continue

            job["jobgroup"] = self.id

            if job["name"] == None:
                job["name"] = makeUUID()

            listOfJobs.append(job)

        bulkAction = self.daofactory(classname = "Jobs.New")
        result = bulkAction.execute(jobList = listOfJobs)

        #Use the results of the bulk commit to get the jobIDs
        fileDict = {}
        for job in listOfJobs:
            job['id'] = result[job['name']]
            fileDict[job['id']] = []
            for file in job['input_files']:
                fileDict[job['id']].append(file['id'])

        maskAction = self.daofactory(classname = "Masks.New")
        maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        fileAction = self.daofactory(classname = "Jobs.AddFiles")
        fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())


        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)

        return


    def getLocationsForJobs(self):
        """
        Gets a list of the locations that jobs can run at
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.GetLocationsForJobs")
        result = action.execute(id = self.id, conn = self.getDBConn(),
                                transaction = self.existingTransaction())

        return result


    def __str__(self):
        """
        __str__

        Print out some information about the jobGroup
        as if jobGroup inherited from dict()
        """

        d = {'id': self.id, 'uid': self.uid, 'subscription': self.subscription,
             'output': self.output, 'jobs': self.jobs,
             'newjobs': self.newjobs}

        return str(d)
Example #23
0
    def _checkTask(self, task, taskConf):
        """
        _checkTask_
        
        Give the provided task instance the once over, make sure parentage is set correctly etc
        
        """
        if taskConf.has_key("InputTask"):
            inpTask = taskConf['InputTask']
            inpMod = taskConf['InputFromOutputModule']
            inpTaskPath = task.getPathName()
            inpTaskPath = inpTaskPath.replace(task.name(), "")
            inpTaskPath += "cmsRun1"
            self.assertEqual(task.data.input.inputStep, inpTaskPath)

        
        workflow = Workflow(name = self.workload.name(),
                            task = task.getPathName())
        workflow.load()
        mods = outputModuleList(task)

        for mod in mods:
            filesets = workflow.outputMap[mod][0]
            merged = filesets['merged_output_fileset']
            unmerged = filesets['output_fileset']
            
            merged.loadData()
            unmerged.loadData()
            mergedset = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-Merged"
            if mod == "logArchive":
                mergedset = task.getPathName() + "/unmerged-" + mod
            unmergedset = task.getPathName() + "/unmerged-" + mod
            
            self.failUnless(mergedset == merged.name)
            self.failUnless(unmergedset == unmerged.name)

            if mod == "logArchive": continue
            
            mergeTask = task.getPathName() + "/" + task.name() + "Merge" + mod
            
            mergeWorkflow = Workflow(name = self.workload.name(),
                                     task = mergeTask)
            mergeWorkflow.load()
            if not mergeWorkflow.outputMap.has_key("Merged"):
                msg = "Merge workflow does not contain a Merged output key"
                self.fail(msg)
            mrgFileset = mergeWorkflow.outputMap['Merged'][0]
            mergedFS = mrgFileset['merged_output_fileset']
            unmergedFS = mrgFileset['output_fileset']
            mergedFS.loadData()
            unmergedFS.loadData()
            self.assertEqual(mergedFS.name, mergedset)
            self.assertEqual(unmergedFS.name, mergedset)
            
            mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset']
            umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset']
            mrgLogArch.loadData()
            umrgLogArch.loadData()
            
            archName = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-logArchive"
            
            self.assertEqual(mrgLogArch.name, archName)
            self.assertEqual(umrgLogArch.name,archName)
            
            
        
        #
        # test subscriptions made by this task if it is the first task
        #
        if taskConf.has_key("InputDataset") or taskConf.has_key("PrimaryDataset"):
            taskFileset = Fileset(name = "%s-%s-SomeBlock" % (self.workload.name(), task.name() ))
            taskFileset.loadData()

            taskSubscription = Subscription(fileset = taskFileset, workflow = workflow)
            taskSubscription.loadData()
        
            if taskConf.has_key("PrimaryDataset"):
                # generator type
                self.assertEqual(taskSubscription["type"], "Production",
                                 "Error: Wrong subscription type.")
                self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"],
                                 "Error: Wrong split algo.")
            if taskConf.has_key("InputDataset"):
                # processor type
                self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for processing first task")
                self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch")
Example #24
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments["ProcConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'lheproduction')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Production",
                         "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")                

        return
Example #25
0
    def testPromptRecoWithSkims(self):
        """
        _testT1PromptRecoWithSkim_

        Create a T1 Prompt Reconstruction workflow with PromptSkims
        and verify it installs into WMBS correctly.
        """
        self.setupPromptSkimConfigObject()
        testArguments = getTestArguments()
        testArguments["PromptSkims"] = [self.promptSkim]
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["CouchDBName"] = "promptreco_t"
        testArguments["EnvPath"] = os.environ.get("EnvPath", None)
        testArguments["BinPath"] = os.environ.get("BinPath", None)

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name = "TestWorkload",
                                    task = "/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                        "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                              "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        promptSkimWorkflow = Workflow(name="TestWorkload",
                                      task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1")
        promptSkimWorkflow.load()

        self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3",
                            "fakeSkimOut4", "fakeSkimOut5"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3",
                            "fakeSkimOut4", "fakeSkimOut5"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(recoSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged")
        mergedRecoFileset.loadData()

        promptSkimSubscription = Subscription(fileset = mergedRecoFileset, workflow = promptSkimWorkflow)
        promptSkimSubscription.loadData()

        self.assertEqual(promptSkimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(promptSkimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"])

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        unmergedOutputs = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3",
                           "fakeSkimOut4", "fakeSkimOut5"]
        for unmergedOutput in unmergedOutputs:
            unmergedPromptSkim = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput)
            unmergedPromptSkim.loadData()
            promptSkimMergeWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput)
            promptSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedPromptSkim, workflow = promptSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3",
                           "fakeSkimOut4", "fakeSkimOut5"]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"])

        recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        promptSkimLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive")
        promptSkimLogCollect.loadData()
        promptSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect")
        promptSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = promptSkimLogCollect, workflow = promptSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3",
                           "fakeSkimOut4", "fakeSkimOut5"]
        for goldenOutputMod in goldenOutputMods:
            promptSkimMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod)
            promptSkimMergeLogCollect.loadData()
            promptSkimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                    task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            promptSkimMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = promptSkimMergeLogCollect, workflow = promptSkimMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        return
Example #26
0
    def testRepack(self):
        """
        _testRepack_

        Create a Repack workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = RepackWorkloadFactory.getTestArguments()
        testArguments.update(deepcopy(REQUEST))

        factory = RepackWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        repackWorkflow = Workflow(name="TestWorkload",
                                  task="/TestWorkload/Repack")
        repackWorkflow.load()
        self.assertEqual(len(repackWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1,
                         "Error: Wrong number of WF outputs in the Repack WF.")

        goldenOutputMods = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = repackWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = repackWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_PrimaryDataset1_RAW":
                self.assertEqual(mergedOutput.name,
                                 "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = repackWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod, tier in goldenOutputMods.items():
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 3,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Repack")
        topLevelFileset.loadData()

        repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow)
        repackSubscription.loadData()

        self.assertEqual(repackSubscription["type"], "Repack",
                         "Error: Wrong subscription type.")
        self.assertEqual(repackSubscription["split_algo"], "Repack",
                         "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"])

        unmergedOutputs = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"}
        for unmergedOutput, tier in unmergedOutputs.items():
            fset = unmergedOutput + tier
            unmergedDataTier = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name="TestWorkload",
                                             task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "RepackMerge",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        repackLogCollect = Fileset(name="/TestWorkload/Repack/unmerged-logArchive")
        repackLogCollect.loadData()
        repackLogCollectWorkflow = Workflow(name="TestWorkload",
                                            task="/TestWorkload/Repack/LogCollect")
        repackLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        for goldenOutputMod, tier in goldenOutputMods.items():
            repackMergeLogCollect = Fileset(
                name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod)
            repackMergeLogCollect.loadData()
            repackMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                     task="/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % (
                                                         goldenOutputMod, goldenOutputMod))
            repackMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        return
Example #27
0
    def verifyDiscardRAW(self):
        """
        _verifyDiscardRAW_

        Verify that a workflow that discards the RAW was installed into WMBS
        correctly.
        """
        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        return
Example #28
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch',
                                   ceName = 'site1', plugin = "TestPlugin")
        resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch',
                                   ceName = 'site2', plugin = "TestPlugin")        

        testWorkload = self.createTestWMSpec()
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir)
        testWMBSHelper.createSubscription()

        testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")
        testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir)
        testResubmitWMBSHelper.createSubscription()

        mergeWorkflow = Workflow(name = "ResubmitTestWorkload",
                                 task = "/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name = "ResubmitTestWorkload",
                                 task = "/ResubmitTestWorkload/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")        

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(name = "ResubmitTestWorkload",
                                task = "/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name)
        self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Unmerged output fileset is wrong.")
        self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")        

        skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Example #29
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = StoreResultsWorkloadFactory.getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})

        factory = StoreResultsWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", arguments)

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "StoreResults",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s." %
                unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"],
                         "ParentlessMergeBySize", "Error: Wrong split algo.")

        return
Example #30
0
    def testDataProcessing(self):
        """
        _testDataProcessing_

        Create a data processing workflow and verify it installs into WMBS
        correctly. Check that we can drop an output module.
        """
        testArgs = getTestArguments()
        testArgs['TransientOutputModules'] = ['RECOoutput']
        testWorkload = dataProcessingWorkload("TestWorkload", testArgs)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["RECOoutput", "ALCARECOoutput"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod in testArgs["TransientOutputModules"]:
                self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            else:
                self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            if goldenOutputMod in testArgs["TransientOutputModules"]:
                # No merge for this output module
                continue
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput")
        # No merge workflow should exist in WMBS
        self.assertRaises(IndexError, recoMergeWorkflow.load)

        unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        for procOutput in ["RECOoutput", "ALCARECOoutput"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #31
0
    def testAnalysis(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchUrl"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "analysis_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig()
        defaultArguments["ProcessingVersion"] = 'v1'

        analysisProcessingFactory = AnalysisWorkloadFactory()
        testWorkload = analysisProcessingFactory("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name = "TestWorkload",
                              task = "/TestWorkload/Analysis")
        procWorkflow.load()
        self.assertEqual(len(procWorkflow.outputMap.keys()), 2,
                                  "Error: Wrong number of WF outputs.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        output = procWorkflow.outputMap["output"][0]["output_fileset"]
        mergedOutput = procWorkflow.outputMap["output"][0]["merged_output_fileset"]
        output.loadData()
        mergedOutput.loadData()
        self.assertEqual(output.name, "/TestWorkload/Analysis/unmerged-output",
                             "Error: Unmerged output fileset is wrong: " + output.name)
        self.assertEqual(mergedOutput.name, "/TestWorkload/Analysis/unmerged-output",
                             "Error: Unmerged output fileset is wrong: " + mergedOutput.name)

        topLevelFileset = Fileset(name = "TestWorkload-Analysis-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "Analysis",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")


        procLogCollect = Fileset(name = "/TestWorkload/Analysis/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Analysis/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
Example #32
0
    def testDependentReDigi(self):
        """
        _testDependentReDigi_

        Verfiy that a dependent ReDigi workflow that keeps stages out
        RAW data is created and installed into WMBS correctly.
        """
        defaultArguments = ReDigiWorkloadFactory.getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = injectReDigiConfigs(self.configDatabase)
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]
        defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput"
        defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput"

        factory = ReDigiWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments)

        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput")
        stepOneUnmergedRAWFileset.loadData()
        stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged")
        stepOneMergedRAWFileset.loadData()
        stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepOneLogArchiveFileset.loadData()
        stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive")
        stepOneMergeLogArchiveFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule")
        stepThreeUnmergedAODFileset.loadData()
        stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged")
        stepThreeMergedAODFileset.loadData()
        stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive")
        stepThreeLogArchiveFileset.loadData()

        stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive")
        stepThreeMergeLogArchiveFileset.loadData()

        stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepOneWorkflow.load()
        self.assertEqual(stepOneWorkflow.wfType, 'reprocessing')
        self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")

        for outputMod in stepOneWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset)
        stepOneSub.loadData()
        self.assertEqual(stepOneSub["type"], "Processing",
                         "Error: Step one sub has wrong type.")

        stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput")
        stepOneCleanupWorkflow.load()
        self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneCleanupSub.loadData()
        self.assertEqual(stepOneCleanupSub["type"], "Cleanup",
                         "Error: Step one sub has wrong type.")

        stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepOneLogCollectWorkflow.load()
        self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset)
        stepOneLogCollectSub.loadData()
        self.assertEqual(stepOneLogCollectSub["type"], "LogCollect",
                         "Error: Step one sub has wrong type.")

        stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                        task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput")
        stepOneMergeWorkflow.load()
        self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        for outputMod in stepOneMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneMergeSub.loadData()
        self.assertEqual(stepOneMergeSub["type"], "Merge",
                         "Error: Step one sub has wrong type.")

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                     task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc")
        stepThreeWorkflow.load()
        self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset)
        stepThreeSub.loadData()
        self.assertEqual(stepThreeSub["type"], "Processing",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule")
        stepThreeCleanupWorkflow.load()
        self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeCleanupSub.loadData()
        self.assertEqual(stepThreeCleanupSub["type"], "Cleanup",
                         "Error: Step three sub has wrong type.")

        stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                               task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect")
        stepThreeLogCollectWorkflow.load()
        self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset)
        stepThreeLogCollectSub.loadData()
        self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect",
                         "Error: Step three sub has wrong type.")

        stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule")
        stepThreeMergeWorkflow.load()
        self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeMergeSub.loadData()
        self.assertEqual(stepThreeMergeSub["type"], "Merge",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        return
Example #33
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ProcessingString"] = "ProcString"
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({"SkimName1": "SomeSkim",
                                  "SkimInput1": "RECOoutput",
                                  "Skim1ConfigCacheID": skimConfig})
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = recoConfig

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\
                         tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["RECOoutput", "DQMoutput"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        unmergedDqm = Fileset(name = "/TestWorkload/DataProcessing/unmerged-DQMoutput")
        unmergedDqm.loadData()
        dqmMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput")
        dqmMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedDqm, workflow = dqmMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        for procOutput in ["RECOoutput", "DQMoutput"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged")
        topLevelFileset.loadData()

        dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #34
0
    def verifyKeepAOD(self):
        """
        _verifyKeepAOD_

        Verify that a workflow that only produces AOD in a single step was
        installed correctly into WMBS.
        """
        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepTwoUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-aodOutputModule")
        stepTwoUnmergedAODFileset.loadData()
        stepTwoMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-Merged")
        stepTwoMergedAODFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeAODLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-logArchive")
        stepTwoMergeAODLogArchiveFileset.loadData()

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepTwoWorkflow.load()
        self.assertTrue("aodOutputModule" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepTwoUnmergedAODFileset.id,
                         "Error: AOD output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedaodOutputModule")
        stepTwoCleanupAODWorkflow.load()
        self.assertEqual(len(stepTwoCleanupAODWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupAODSub = Subscription(workflow = stepTwoCleanupAODWorkflow, fileset = stepTwoUnmergedAODFileset)
        stepTwoCleanupAODSub.loadData()
        self.assertEqual(stepTwoCleanupAODSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule")
        stepTwoMergeAODWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeAODWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeAODWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepTwoMergeAODSub = Subscription(workflow = stepTwoMergeAODWorkflow, fileset = stepTwoUnmergedAODFileset)
        stepTwoMergeAODSub.loadData()
        self.assertEqual(stepTwoMergeAODSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeAODWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeAODWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        return
Example #35
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll test the skims and DQMHarvest here.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments['ProcessingString']  = 'ProcString'
        dataProcArguments['ConfigCacheID'] = recoConfig
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "RECOoutput",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\
                         tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged")
        topLevelFileset.loadData()

        dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #36
0
    def testPrivateMC(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "privatemc_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig()
        defaultArguments["ProcessingVersion"] = 1

        processingFactory = PrivateMCWorkloadFactory()
        testWorkload = processingFactory("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name = "TestWorkload",
                              task = "/TestWorkload/PrivateMC")
        procWorkflow.load()
        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                                  "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys()))

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "PrivateMC",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/PrivateMC/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
Example #37
0
    def testReRecoDroppingRECO(self):
        """
        _testReRecoDroppingRECO_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on. This tests run on unmerged RECO output
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ProcessingString"] = "ProcString"
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({"SkimName1": "SomeSkim",
                                  "SkimInput1": "RECOoutput",
                                  "Skim1ConfigCacheID": skimConfig})
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["TransientOutputModules"] = ["RECOoutput"]
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig()

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \
                         SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \
                         Merged.mergedLFNBase,
                         '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1')

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod, tier in goldenOutputMods.items():
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (
                             goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput, tier in goldenOutputMods.items():
            fset = skimOutput + tier
            unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset)
            unmerged.loadData()
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput, tier in goldenOutputMods.items():
            fset = skimOutput + tier
            unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in goldenOutputMods:
            skimMergeLogCollect = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                   task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % (
                                                       skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Example #38
0
    def testPromptRecoWithSkims(self):
        """
        _testT1PromptRecoWithSkim_

        Create a T1 Prompt Reconstruction workflow with PromptSkims
        and verify it installs into WMBS correctly.
        """
        testArguments = PromptRecoWorkloadFactory.getTestArguments()
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["EnableHarvesting"] = True
        factory = PromptRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(mergedOutput.name,
                                 "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                         "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            fset = goldenOutputMod + "ALCARECO"
            mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name,
                             "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(name="TestWorkload",
                               task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged")
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier),
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name,
                             "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name,
                             "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name,
                             "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name,
                             "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"}
        for unmergedOutput, tier in unmergedOutputs.items():
            fset = unmergedOutput + tier
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name="TestWorkload",
                                             task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(name="TestWorkload",
                                             task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"}
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name="TestWorkload",
                                       task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload",
                                              task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(name="TestWorkload",
                                                   task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (
                                                       goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload",
                                                  task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (
                                                      goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload",
                                                task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #39
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments["ConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'lheproduction')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Production",
                         "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return