Ejemplo n.º 1
0
    def testPileupFiles(self):
        """
        _testPileupFiles_

        Test that alll the pileup files end up in the report
        """

        report = Report("cmsRun1")
        report.parse(self.pileupXmlPath)
        self.assertEqual(len(report.getAllInputFiles()), 14)

        primaryCount = 0
        secondaryCount = 0
        mixingCount = 0

        for fileEntry in report.getAllInputFiles():
            if fileEntry['input_type'] == 'mixingFiles':
                mixingCount += 1
            elif fileEntry['input_type'] == 'primaryFiles':
                primaryCount += 1
            elif fileEntry['input_type'] == 'secondaryFiles':
                secondaryCount += 1

        self.assertEqual(primaryCount, 1)
        self.assertEqual(secondaryCount, 0)
        self.assertEqual(mixingCount, 13)
        self.assertEqual(len(report.getAllFallbackFiles()), 1)

        return
Ejemplo n.º 2
0
    def _handleSubmitFailedJobs(self, badJobs, exitCode):
        """
        __handleSubmitFailedJobs_

        For a default job report for the exitCode
        and register in the job. Preserve it on disk as well.
        Propagate the failure to the JobStateMachine.
        """
        fwjrBinds = []
        for job in badJobs:
            job['couch_record'] = None
            job['fwjr'] = Report()
            if exitCode in (71102, 71104):
                job['fwjr'].addError(
                    "JobSubmit", exitCode, "SubmitFailed",
                    WM_JOB_ERROR_CODES[exitCode] +
                    ', '.join(job['possibleLocations']))
            else:
                job['fwjr'].addError("JobSubmit", exitCode, "SubmitFailed",
                                     WM_JOB_ERROR_CODES[exitCode])
            fwjrPath = os.path.join(job['cache_dir'],
                                    'Report.%d.pkl' % int(job['retry_count']))
            job['fwjr'].setJobID(job['id'])
            try:
                job['fwjr'].save(fwjrPath)
                fwjrBinds.append({"jobid": job["id"], "fwjrpath": fwjrPath})
            except IOError as ioer:
                logging.error(
                    "Failed to write FWJR for submit failed job %d, message: %s",
                    job['id'], str(ioer))
        self.changeState.propagate(badJobs, "submitfailed", "created")
        self.setFWJRPathAction.execute(binds=fwjrBinds)
        return
Ejemplo n.º 3
0
    def test_PerformanceReport(self):
        """
        _PerformanceReport_

        Test the performance report part of the job report
        """

        report = Report("cmsRun1")
        report.setStepVSize(stepName="cmsRun1",
                            minimum=100,
                            maximum=800,
                            average=244)
        report.setStepRSS(stepName="cmsRun1",
                          minimum=100,
                          maximum=800,
                          average=244)
        report.setStepPCPU(stepName="cmsRun1",
                           minimum=100,
                           maximum=800,
                           average=244)
        report.setStepPMEM(stepName="cmsRun1",
                           minimum=100,
                           maximum=800,
                           average=244)

        perf = report.retrieveStep("cmsRun1").performance
        for section in perf.dictionary_().values():
            d = section.dictionary_()
            self.assertEqual(d['min'], 100)
            self.assertEqual(d['max'], 800)
            self.assertEqual(d['average'], 244)
        return
Ejemplo n.º 4
0
    def testB_ExecuteNonZeroExit(self):
        """
        _ExecuteNonZeroExit_

        Test the execution of a script
        which exits with non-zero code.
        """
        self.step.application.command.executable = "brokenCmsRun.py"
        shutil.copy(os.path.join(getTestBase(),
                                 "WMCore_t/FwkJobReport_t/CMSSWFailReport.xml"),
                    os.path.join(self.step.builder.workingDir, "FrameworkJobReport.xml"))
        try:
            os.chdir(self.step.builder.workingDir)
            executor = StepFactory.getStepExecutor("CMSSW")
            executor.initialise(self.step, self.job)
            executor.pre()
            executor.step.runtime.scramPreScripts.remove("SetupCMSSWPset")
            try:
                executor.execute()
                self.fail("An exception should have been raised")
            except WMExecutionFailure, ex:
                executor.diagnostic(ex.code, executor, ExceptionInstance = ex)
                self.assertEqual(8001, executor.report.getExitCode())
                report = Report()
                report.load("Report.pkl")
                self.assertEqual(8001, report.getExitCode())
        except Exception, ex:
            self.fail("Failure encountered, %s" % str(ex))
Ejemplo n.º 5
0
    def testC_ExecuteSegfault(self):
        """
        _ExecuteSegfault_

        Test the execution of a script
        which raises a ABRT signal which
        is the normal CMSSW response
        to a SEGFAULT.
        """
        self.step.application.command.executable = "test.sh"
        # CMSSW leaves an empty FWJR when a SEGFAULT is present
        open(os.path.join(self.step.builder.workingDir, "FrameworkJobReport.xml"), "w").close()
        try:
            os.chdir(self.step.builder.workingDir)
            executor = StepFactory.getStepExecutor("CMSSW")
            executor.initialise(self.step, self.job)
            executor.pre()
            executor.step.runtime.scramPreScripts.remove("SetupCMSSWPset")
            try:
                executor.execute()
                self.fail("An exception should have been raised")
            except WMExecutionFailure, ex:
                executor.diagnostic(ex.code, executor, ExceptionInstance = ex)
                self.assertEqual(134, executor.report.getExitCode())
                report = Report()
                report.load("Report.pkl")
                self.assertEqual(134, report.getExitCode())
        except Exception, ex:
            self.fail("Failure encountered, %s" % str(ex))
Ejemplo n.º 6
0
    def testCPBackendStageOutAgainstReportNew(self):
        myReport = Report()
        reportPath = os.path.join(self.testDir, 'UnitTests', 'WMTaskSpace',
                                  'cmsRun1', 'Report.pkl')
        myReport.unpersist(reportPath)
        myReport.data.cmsRun1.status = 0
        myReport.persist(reportPath)
        executor = StageOutExecutor.StageOut()
        executor.initialise(self.stepdata, self.job)
        self.setLocalOverride(self.stepdata)
        self.stepdata.override.newStageOut = True
        executor.step = self.stepdata
        # It should fail with:
        # AssertionError: LFN candidate: hosts doesn't match any of the following regular expressions:
        with self.assertRaises(AssertionError):
            executor.execute()

        # now fix those output file names to pass the Lexicon check, and execute it again
        myReport.unpersist(reportPath)
        # cmsRun1.output.FEVT.files.file0.lfn = 'hosts'
        # cmsRun1.output.ALCARECOStreamCombined.files.file0.lfn = '/test1/hosts'
        myReport.data.cmsRun1.output.FEVT.files.file0.lfn = "/store/mc/acqera/pd/FEVT/procstr/abc123.root"
        myReport.data.cmsRun1.output.ALCARECOStreamCombined.files.file0.lfn = "/store/mc/acqera/pd/ALCARECO/procstr/abc123.root"
        myReport.persist(reportPath)
        executor.execute()

        self.assertTrue(
            os.path.exists(
                os.path.join(self.testDir, "store", "mc", "acqera", "pd",
                             "FEVT")))
        self.assertTrue(
            os.path.exists(
                os.path.join(self.testDir, "store", "mc", "acqera", "pd",
                             "ALCARECO")))
Ejemplo n.º 7
0
    def testAbortedState(self):
        """
        _testAbortedState_

        Check that we can kill jobs when a site is set to aborted
        ### We no longer need this test as we are not killing jobs that are running
        """
        self.tempDir = self.testInit.generateWorkDir()
        config = self.createConfig()
        myResourceControl = ResourceControl(config)
        myResourceControl.insertSite("testSite1", 10, 20, "testSE1", "testCE1", "T1_US_FNAL", "MockPlugin")
        myResourceControl.insertSite("testSite2", 20, 40, "testSE2", "testCE2", "T1_IT_CNAF", "MockPlugin")

        myResourceControl.insertThreshold("testSite1", "Processing", 20, 10)
        myResourceControl.insertThreshold("testSite1", "Merge", 200, 100)
        myResourceControl.insertThreshold("testSite2", "Processing", 50, 25)
        myResourceControl.insertThreshold("testSite2", "Merge", 135, 65)

        self.createJobs()

        myResourceControl.changeSiteState("testSite1", "Aborted")

        ## Now check the tempDir for a FWJR for the killed job
        reportPath = os.path.join(self.tempDir, "Report.0.pkl")
        report = Report()
        report.load(reportPath)
        self.assertEqual(report.getExitCode(), 71301)
        return
Ejemplo n.º 8
0
    def testUnitTestBackendNew(self):
        myReport = Report()
        myReport.unpersist(
            os.path.join(self.testDir, 'UnitTests', 'WMTaskSpace', 'cmsRun1',
                         'Report.pkl'))
        myReport.data.cmsRun1.status = 1
        myReport.persist(
            os.path.join(self.testDir, 'UnitTests', 'WMTaskSpace', 'cmsRun1',
                         'Report.pkl'))

        executor = StageOutExecutor.StageOut()
        helper = StageOutTemplate.StageOutStepHelper(self.stepdata)
        helper.addOverride(override='command', overrideValue='test-win')
        helper.addOverride(override='option', overrideValue='')
        helper.addOverride(override='phedex-node',
                           overrideValue='charlie.sheen.biz')
        helper.addOverride(override='lfn-prefix', overrideValue='test-win')
        helper.setNewStageoutOverride(True)

        executor.initialise(self.stepdata, self.job)
        self.setLocalOverride(self.stepdata)
        executor.step = self.stepdata
        executor.execute()
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'hosts')))
        self.assertFalse(
            os.path.exists(os.path.join(self.testDir, 'test1', 'hosts')))
Ejemplo n.º 9
0
    def testUnitTestBackend(self):
        myReport = Report()
        myReport.unpersist(
            os.path.join(self.testDir, 'UnitTests', 'WMTaskSpace', 'cmsRun1',
                         'Report.pkl'))
        myReport.data.cmsRun1.status = 1
        myReport.persist(
            os.path.join(self.testDir, 'UnitTests', 'WMTaskSpace', 'cmsRun1',
                         'Report.pkl'))

        executor = LogArchiveExecutor.LogArchive()
        helper = LogArchiveTemplate.LogArchiveStepHelper(self.stepdata)
        helper.addOverride(override='command', overrideValue='test-win')
        helper.addOverride(override='option', overrideValue='')
        helper.addOverride(override='se-name',
                           overrideValue='charlie.sheen.biz')
        helper.addOverride(override='lfn-prefix', overrideValue='test-win')

        executor.initialise(self.stepdata, self.job)
        self.setLocalOverride(self.stepdata)
        executor.step = self.stepdata
        executor.execute()
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'hosts')))
        self.assertFalse(
            os.path.exists(os.path.join(self.testDir, 'test1', 'hosts')))
Ejemplo n.º 10
0
    def testStripReport(self):
        """
        _testStripReport_

        Test whether or not we can strip input file information
        from a FWJR and create a smaller object.
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        path1 = os.path.join(self.testDir, 'testReport1.pkl')
        path2 = os.path.join(self.testDir, 'testReport2.pkl')

        myReport.save(path1)
        info = FileTools.getFileInfo(filename=path1)
        sizeBefore = info['Size']

        inputFiles = myReport.getAllInputFiles()
        self.assertEqual(len(inputFiles), 1)
        myReport.stripInputFiles()
        self.assertEqual(len(myReport.getAllInputFiles()), 0)

        myReport.save(path2)
        info = FileTools.getFileInfo(filename=path2)
        sizeAfter = info['Size']

        self.assertGreater(sizeBefore, sizeAfter)

        return
Ejemplo n.º 11
0
    def testOutputFiles(self):
        """
        _testOutputFiles_

        Test some basic manipulation of output files
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        files = myReport.getAllFilesFromStep(step="cmsRun1")

        f1 = files[0]
        f2 = files[1]

        self.assertEqual(f1['outputModule'], 'outputRECORECO')
        self.assertEqual(f1['pfn'], 'outputRECORECO.root')

        self.assertEqual(f2['outputModule'], 'outputALCARECORECO')
        self.assertEqual(f2['pfn'], 'outputALCARECORECO.root')

        for f in files:
            self.assertEqual(f['events'], 2)
            self.assertEqual(f['configURL'], None)
            self.assertEqual(f['merged'], False)
            self.assertEqual(f['validStatus'], None)
            self.assertEqual(f['first_event'], 0)

        return
Ejemplo n.º 12
0
    def testDeleteOutputModule(self):
        """
        _testDeleteOutputModule_

        If asked delete an output module, if it doesn't
        exist then do nothing
        """
        originalReport = Report("cmsRun1")
        originalReport.parse(self.xmlPath)

        self.assertTrue(
            originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"),
            "Error: Report XML doesn't have the module for the test, invalid test"
        )

        originalOutputModules = len(
            originalReport.retrieveStep("cmsRun1").outputModules)
        originalReport.deleteOutputModuleForStep("cmsRun1",
                                                 "outputALCARECORECO")
        self.assertFalse(
            originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"),
            "Error: The output module persists after deletion")
        self.assertEqual(
            len(originalReport.retrieveStep("cmsRun1").outputModules),
            originalOutputModules - 1,
            "Error: The number of output modules is incorrect after deletion")
Ejemplo n.º 13
0
    def testJSONEncoding(self):
        """
        _testJSONEncoding_

        Verify that turning the FWJR into a JSON object works correctly.
        """
        xmlPath = os.path.join(
            getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")
        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        jsonReport = myReport.__to_json__(None)

        assert "task" in jsonReport.keys(), \
            "Error: Task name missing from report."

        assert len(jsonReport["steps"].keys()) == 1, \
            "Error: Wrong number of steps in report."
        assert "cmsRun1" in jsonReport["steps"].keys(), \
            "Error: Step missing from json report."

        cmsRunStep = jsonReport["steps"]["cmsRun1"]

        jsonReportSections = [
            "status", "errors", "logs", "parameters", "site", "analysis",
            "cleanup", "input", "output", "start"
        ]
        for jsonReportSection in jsonReportSections:
            assert jsonReportSection in cmsRunStep.keys(), \
                "Error: missing section: %s" % jsonReportSection

        return
Ejemplo n.º 14
0
    def testPerformanceSummary(self):
        """
        _testPerformanceSummary_

        Test whether or not we can pull performance information
        out of a Timing/SimpleMemoryCheck jobReport
        """

        xmlPath = os.path.join(
            getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        # Do a brief check of the three sections
        perf = myReport.data.cmsRun1.performance

        self.assertEqual(perf.memory.PeakValueRss, '492.293')
        self.assertEqual(perf.cpu.TotalJobCPU, '9.16361')
        self.assertEqual(perf.storage.writeTotalMB, 5.22226)
        self.assertAlmostEqual(perf.storage.writeTotalSecs, 0,
                               places=0)  # actual value is 0.06
        self.assertEqual(perf.storage.readPercentageOps, 0.98585512216030857)

        return
Ejemplo n.º 15
0
    def testMultiCoreReport(self):
        """
        _testMultiCoreReport_

        Verify that multicore reports can be json encoded and uploaded to couch.
        """
        couchdb = CouchServer(os.environ["COUCHURL"])
        fwjrdatabase = couchdb.connectDatabase("report_t/fwjrs")

        self.mcPath = os.path.join(
            getTestBase(), "WMCore_t/FwkJobReport_t/MulticoreReport.pkl")
        myReport = Report()
        myReport.unpersist(self.mcPath)

        fwjrDocument = {
            "_id": "303-0",
            "jobid": 303,
            "retrycount": 0,
            "fwjr": myReport.__to_json__(None),
            "type": "fwjr"
        }

        fwjrdatabase.queue(fwjrDocument, timestamp=True)
        fwjrdatabase.commit()
        return
Ejemplo n.º 16
0
    def testPerformanceJSON(self):
        """
        _testPerformanceJSON_

        Verify that the performance section of the report is correctly converted
        to JSON.
        """
        xmlPath = os.path.join(
            getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        perfSection = myReport.__to_json__(
            thunker=None)["steps"]["cmsRun1"]["performance"]

        self.assertTrue("storage" in perfSection,
                        "Error: Storage section is missing.")
        self.assertTrue("memory" in perfSection,
                        "Error: Memory section is missing.")
        self.assertTrue("cpu" in perfSection, "Error: CPU section is missing.")

        self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105",
                         "Error: AvgEventCPU is wrong.")
        self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703",
                         "Error: TotalJobTime is wrong.")
        self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166,
                         "Error: readTotalMB is wrong.")
        self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653,
                         "Error: readMaxMSec is wrong")
        self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293",
                         "Error: PeakValueRss is wrong.")
        self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281",
                         "Error: PeakValueVsize is wrong.")
        return
Ejemplo n.º 17
0
    def generateCreateFailedReports(self, createFailedJobs):
        """
        _generateCreateFailedReports_

        Create and store FWJR for the  jobs that failed on creation
        leaving meaningful information about what happened with them
        """
        if not createFailedJobs:
            return

        fjrsToSave = []
        for failedJob in createFailedJobs:
            report = Report()
            defaultMsg = "There is a condition which assures that this job will fail if it's submitted"
            report.addError("CreationFailure", 99305, "CreationFailure", failedJob.get("failedReason", defaultMsg))
            jobCache = failedJob.getCache()
            try:
                fjrPath = os.path.join(jobCache, "Report.0.pkl")
                report.save(fjrPath)
                fjrsToSave.append({"jobid": failedJob["id"], "fwjrpath": fjrPath})
                failedJob["fwjr"] = report
            except Exception:
                logging.error("Something went wrong while saving the report for  job %s" % failedJob["id"])

        myThread = threading.currentThread()
        self.setFWJRPath.execute(binds = fjrsToSave, conn = myThread.transaction.conn, transaction = True)

        return
Ejemplo n.º 18
0
    def setUp(self):

        self.directory = os.path.dirname(
            os.path.realpath(inspect.getsourcefile(findThisModule)))
        self.multiReport = os.path.join(self.directory,
                                        "CMSSWMulticoreTopReport.xml")
        self.report = Report("MultiReport_t")
Ejemplo n.º 19
0
    def testProperties(self):
        """
        _testProperties_

        Test data fields for the properties information for DBS
        """

        myReport = Report("cmsRun1")
        myReport.parse(self.xmlPath)

        name = "ThisIsASillyString"

        myReport.setValidStatus(name)
        myReport.setGlobalTag(name)
        myReport.setAcquisitionProcessing(acquisitionEra='NULL',
                                          processingVer=name)
        myReport.setInputDataset(inputPath='/lame/path')

        for f in myReport.getAllFilesFromStep("cmsRun1"):
            self.assertEqual(f['globalTag'], name)
            self.assertEqual(f['validStatus'], name)
            self.assertEqual(f['processingVer'], name)
            self.assertEqual(f['acquisitionEra'], 'NULL')
            self.assertEqual(f['inputPath'], '/lame/path')

        return
Ejemplo n.º 20
0
    def execute(self, emulator=None):
        """
        _execute_

        """
        # Are we using emulators again?
        if emulator is not None:
            return emulator.emulate(self.step, self.job)

        logging.info("Steps.Executors.%s.execute called",
                     self.__class__.__name__)

        if self.step.upload.proxy:
            try:
                self.stepSpace.getFromSandbox(self.step.upload.proxy)
            except Exception as ex:
                # Let it go, it wasn't in the sandbox. Then it must be
                # somewhere else
                del ex

        # Search through steps for analysis files
        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                # Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location,
                                        step)
            logging.info("Beginning report processing for step %s", step)
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s",
                              step, stepLocation)
                continue

            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)

            # Don't upload nor stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Pulling out the analysis files from each step
            analysisFiles = stepReport.getAnalysisFilesFromStep(step)

            # Working on analysis files
            for analysisFile in analysisFiles:
                # only deal with DQM files
                if analysisFile.FileClass == "DQM":
                    # uploading file to the server
                    self.httpPost(
                        os.path.join(stepLocation,
                                     os.path.basename(analysisFile.fileName)))

            # Am DONE with report
            # Persist it
            stepReport.persist(reportLocation)

        return
Ejemplo n.º 21
0
    def testMultipleInputs(self):
        """
        _testMultipleInputs_

        Verify that parsing XML reports with multiple inputs works correctly.
        """
        xmlPath = os.path.join(
            WMCore.WMBase.getTestBase(),
            "WMCore_t/FwkJobReport_t/CMSSWMultipleInput.xml")
        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        assert hasattr(myReport.data.cmsRun1.input, "source"), \
               "Error: Report missing input source."

        inputFiles = myReport.getInputFilesFromStep("cmsRun1")

        assert len(inputFiles) == 2, \
               "Error: Wrong number of input files."

        for inputFile in inputFiles:
            assert inputFile["input_type"] == "primaryFiles", \
                   "Error: Wrong input type."
            assert inputFile["module_label"] == "source", \
                   "Error: Module label is wrong"
            assert inputFile["catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \
                   "Error: Catalog is wrong."
            assert inputFile["events"] == 2, \
                   "Error: Wrong number of events."
            assert inputFile["input_source_class"] == "PoolSource", \
                   "Error: Wrong input source class."

            if inputFile["guid"] == "F0875ECD-3347-DF11-9FE0-003048678A80":
                assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \
                       "Error: Input LFN is wrong."
                assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \
                       "Error: Input PFN is wrong."
                assert len(inputFile["runs"]) == 1, \
                       "Error: Wrong number of runs."
                assert list(inputFile["runs"])[0].run == 124216, \
                       "Error: Wrong run number."
                assert 1 in list(inputFile["runs"])[0], \
                       "Error: Wrong lumi sections in input file."
            else:
                assert inputFile["guid"] == "626D74CE-3347-DF11-9363-0030486790C0", \
                       "Error: Wrong guid."
                assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \
                       "Error: Input LFN is wrong."
                assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \
                       "Error: Input PFN is wrong."
                assert len(inputFile["runs"]) == 1, \
                       "Error: Wrong number of runs."
                assert list(inputFile["runs"])[0].run == 124216, \
                       "Error: Wrong run number."
                assert 2 in list(inputFile["runs"])[0], \
                       "Error: Wrong lumi sections in input file."

        return
Ejemplo n.º 22
0
 def makeReport(self, fileName):
     myReport = Report('oneitem')
     myReport.addStep('stageOut1')
     myReport.addOutputModule('module1')
     myReport.addOutputModule('module2')
     myReport.addOutputFile('module1', {'lfn': 'FILE1', 'size': 1, 'events': 1})
     myReport.addOutputFile('module2', {'lfn': 'FILE2', 'size': 1, 'events': 1})
     myReport.addOutputFile('module2', {'lfn': 'FILE3', 'size': 1, 'events': 1})
     myReport.persist(fileName)
Ejemplo n.º 23
0
    def testB_EmulatorTest(self):
        """
        _EmulatorTest_

        This is where things get scary.  We need to not only unpack the job,
        but also ascertain whether it can run locally in emulator mode.

        This requires...uh...emulator emulation.
        """


        # Assume all this works, because we tested it in testA
        workloadName = 'basicWorkload'
        workload     = self.createTestWorkload(workloadName = workloadName)

        self.createWMBSComponents(workload = workload)

        self.unpackComponents(workload = workload)


        self.runJobs(workload = workload)

        # Check the report
        taskDir = os.path.join(self.testDir, 'unpack/ReReco/job/WMTaskSpace')
        report = Report()
        report.load(os.path.join(taskDir, 'Report.0.pkl'))
        cmsReport = report.data.cmsRun1



        # Now validate the report
        self.assertEqual(report.data.ceName, socket.gethostname())
        self.assertEqual(report.data.seName, 'cmssrm.fnal.gov')
        self.assertEqual(report.data.siteName, 'T1_US_FNAL')
        self.assertEqual(report.data.hostName, socket.gethostname())
        self.assertTrue(report.data.completed)

        # Should have status 0 (emulator job)
        self.assertEqual(cmsReport.status, 0)

        # Should have one output module
        self.assertEqual(cmsReport.outputModules, ['TestOutputModule'])

        # It should have one file for input and output
        self.assertEqual(cmsReport.input.PoolSource.files.fileCount, 1)
        self.assertEqual(cmsReport.output.TestOutputModule.files.fileCount, 1)

        # So, um, I guess we're done


        # At the end, copy the directory
        #if os.path.exists('tmpDir'):
        #    shutil.rmtree('tmpDir')
        #shutil.copytree(self.testDir, 'tmpDir')

        return
Ejemplo n.º 24
0
    def testFallbackFiles(self):
        """
        _testFallback_

        Test that fallback files end up in the report
        """

        # For negative control, check a good report with no fallback files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        self.assertEqual(goodReport.getAllFallbackFiles(), [])

        # Check a report where the file was a fallback
        badReport = Report("cmsRun1")
        badReport.parse(self.fallbackXmlPath)
        self.assertEqual(sorted(badReport.getAllFallbackFiles()),
                         ['/store/data/Run2012D/SingleElectron/AOD/PromptReco-v1/000/207/279/D43A5B72-1831-E211-895D-001D09F24763.root'])

        return
Ejemplo n.º 25
0
    def testFallbackFilesJSON(self):
        """
        _testFallbackFilesJSON_

        Test that fallback attempt files are translated properly into JSON
        """

        # For negative control, check a good report with no skipped files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        goodJSON = goodReport.__to_json__(None)
        self.assertEqual(goodJSON['fallbackFiles'], [])

        # Check a report where all files were skipped
        badReport = Report("cmsRun1")
        badReport.parse(self.fallbackXmlPath)
        badJSON = badReport.__to_json__(None)
        self.assertEqual(len(badJSON['fallbackFiles']), 1)

        return
Ejemplo n.º 26
0
    def createMissingFWKJR(self, errorCode=999, errorDescription='Failure of unknown type'):
        """
        _createMissingFWJR_

        Create a missing FWJR if the report can't be found by the code in the
        path location.
        """
        report = Report()
        report.addError("cmsRun1", errorCode, "MissingJobReport", errorDescription)
        report.data.cmsRun1.status = "Failed"
        return report
Ejemplo n.º 27
0
    def mergeReport(self):
        """
        _mergeReport_

        read the merge report
        """
        reportInstance = Report(self.stepName)
        ReportReader.xmlToJobReport(
            reportInstance,
            os.path.join(self.workingDir, self.merge_report_file))
        return reportInstance
Ejemplo n.º 28
0
    def _handleSubmitFailedJobs(self, badJobs, exitCode):
        """
        __handleSubmitFailedJobs_

        For a default job report for the exitCode
        and register in the job. Preserve it on disk as well.
        Propagate the failure to the JobStateMachine.
        """
        fwjrBinds = []
        for job in badJobs:
            job['couch_record'] = None
            job['fwjr'] = Report()
            if exitCode in [71102, 71104]:
                job['fwjr'].addError(
                    "JobSubmit", exitCode, "SubmitFailed",
                    WM_JOB_ERROR_CODES[exitCode] +
                    ', '.join(job['possibleLocations']))
            elif exitCode in [71101]:
                # there is no possible site
                if "fileLocations" in job:
                    job['fwjr'].addError(
                        "JobSubmit", exitCode, "SubmitFailed",
                        WM_JOB_ERROR_CODES[exitCode] + ": file locations: " +
                        ', '.join(job['fileLocations']) +
                        ": site white list: " +
                        ', '.join(job['siteWhitelist']) +
                        ": site black list: " +
                        ', '.join(job['siteBlacklist']))
                else:
                    # This is temporary addition if this is patched for existing agent.
                    # If jobs are created before the patch is applied fileLocations is not set.
                    # TODO. remove this later for new agent
                    job['fwjr'].addError(
                        "JobSubmit", exitCode, "SubmitFailed",
                        WM_JOB_ERROR_CODES[exitCode] +
                        ": Job is created before this patch. Please check this input for the jobs: %s "
                        % job['fwjr'].getAllInputFiles())

            else:
                job['fwjr'].addError("JobSubmit", exitCode, "SubmitFailed",
                                     WM_JOB_ERROR_CODES[exitCode])
            fwjrPath = os.path.join(job['cache_dir'],
                                    'Report.%d.pkl' % int(job['retry_count']))
            job['fwjr'].setJobID(job['id'])
            try:
                job['fwjr'].save(fwjrPath)
                fwjrBinds.append({"jobid": job["id"], "fwjrpath": fwjrPath})
            except IOError as ioer:
                logging.error(
                    "Failed to write FWJR for submit failed job %d, message: %s",
                    job['id'], str(ioer))
        self.changeState.propagate(badJobs, "submitfailed", "created")
        self.setFWJRPathAction.execute(binds=fwjrBinds)
        return
Ejemplo n.º 29
0
    def testOutputCheck(self):
        """
        _testOutputCheck_

        Check that we can identify bad reports with no output files
        """
        badReport = Report("cmsRun1")
        badReport.parse(self.skippedAllFilesxmlPath)
        badReport.checkForOutputFiles("cmsRun1")
        self.assertFalse(badReport.stepSuccessful(stepName="cmsRun1"))
        self.assertEqual(badReport.getExitCode(), 60450)
        return
Ejemplo n.º 30
0
    def submit(self, jobs, info=None):
        """
        _submit_

        Submits jobs to the condor queue
        """
        successfulJobs = []
        failedJobs = []

        if len(jobs) == 0:
            # Then was have nothing to do
            return successfulJobs, failedJobs

        schedd = htcondor.Schedd()

        # Submit the jobs
        for jobsReady in grouper(jobs, self.jobsPerSubmit):

            clusterAd = self.getClusterAd()
            procAds = self.getProcAds(jobsReady)

            logging.debug(
                "Start: Submitting %d jobs using Condor Python SubmitMany",
                len(procAds))
            try:
                # 4th argument has to be None otherwise HTCondor leaks the result ads
                # through it (as of 8.7.x). More info in WMCore/#8729
                clusterId = schedd.submitMany(clusterAd, procAds, False, None)
            except Exception as ex:
                logging.error("SimpleCondorPlugin job submission failed.")
                logging.exception(str(ex))
                logging.error(
                    "Moving on the the next batch of jobs and/or cycle....")

                condorErrorReport = Report()
                condorErrorReport.addError("JobSubmit", 61202, "CondorError",
                                           str(ex))
                for job in jobsReady:
                    job['fwjr'] = condorErrorReport
                    failedJobs.append(job)
            else:
                logging.debug(
                    "Job submission to condor succeeded, clusterId is %s",
                    clusterId)
                for index, job in enumerate(jobsReady):
                    job['gridid'] = "%s.%s" % (clusterId, index)
                    job['status'] = 'Idle'
                    successfulJobs.append(job)

        # We must return a list of jobs successfully submitted and a list of jobs failed
        logging.info(
            "Done submitting jobs for this cycle in SimpleCondorPlugin")
        return successfulJobs, failedJobs