Ejemplo n.º 1
0
    def testSkippedFilesJSON(self):
        """
        _testSkippedFilesJSON_

        Test that skipped files are translated properly into JSON
        """
        # Check a report where some files were skipped but not all
        originalReport = Report("cmsRun1")
        originalReport.parse(self.skippedFilesxmlPath)
        originalJSON = originalReport.__to_json__(None)
        self.assertEqual(len(originalJSON['skippedFiles']), 1)

        # For negative control, check a good report with no skipped files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        goodJSON = goodReport.__to_json__(None)
        self.assertEqual(goodJSON['skippedFiles'], [])

        # Check a report where all files were skipped
        badReport = Report("cmsRun1")
        badReport.parse(self.skippedAllFilesxmlPath)
        badJSON = badReport.__to_json__(None)
        self.assertEqual(len(badJSON['skippedFiles']), 2)

        return
Ejemplo n.º 2
0
    def testSkippedFilesJSON(self):
        """
        _testSkippedFilesJSON_

        Test that skipped files are translated properly into JSON
        """
        # Check a report where some files were skipped but not all
        originalReport = Report("cmsRun1")
        originalReport.parse(self.skippedFilesxmlPath)
        originalJSON = originalReport.__to_json__(None)
        self.assertEqual(len(originalJSON['skippedFiles']), 1)

        # For negative control, check a good report with no skipped files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        goodJSON = goodReport.__to_json__(None)
        self.assertEqual(goodJSON['skippedFiles'], [])

        # Check a report where all files were skipped
        badReport = Report("cmsRun1")
        badReport.parse(self.skippedAllFilesxmlPath)
        badJSON = badReport.__to_json__(None)
        self.assertEqual(len(badJSON['skippedFiles']), 2)

        return
Ejemplo n.º 3
0
    def testMultiCoreReport(self):
        """
        _testMultiCoreReport_

        Verify that multicore reports can be json encoded and uploaded to couch.
        """
        couchdb = CouchServer(os.environ["COUCHURL"])
        fwjrdatabase = couchdb.connectDatabase("report_t/fwjrs")

        self.mcPath = os.path.join(
            WMCore.WMBase.getTestBase(),
            "WMCore_t/FwkJobReport_t/MulticoreReport.pkl")
        myReport = Report()
        myReport.unpersist(self.mcPath)

        fwjrDocument = {
            "_id": "303-0",
            "jobid": 303,
            "retrycount": 0,
            "fwjr": myReport.__to_json__(None),
            "type": "fwjr"
        }

        fwjrdatabase.queue(fwjrDocument, timestamp=True)
        fwjrdatabase.commit()
        return
Ejemplo n.º 4
0
    def testJSONEncoding(self):
        """
        _testJSONEncoding_

        Verify that turning the FWJR into a JSON object works correctly.
        """
        xmlPath = os.path.join(
            WMCore.WMBase.getTestBase(),
            "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")
        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        jsonReport = myReport.__to_json__(None)

        assert "task" in jsonReport.keys(), \
               "Error: Task name missing from report."

        assert len(jsonReport["steps"].keys()) == 1, \
               "Error: Wrong number of steps in report."
        assert "cmsRun1" in jsonReport["steps"].keys(), \
               "Error: Step missing from json report."

        cmsRunStep = jsonReport["steps"]["cmsRun1"]

        jsonReportSections = [
            "status", "errors", "logs", "parameters", "site", "analysis",
            "cleanup", "input", "output", "start"
        ]
        for jsonReportSection in jsonReportSections:
            assert jsonReportSection in cmsRunStep.keys(), \
                "Error: missing section: %s" % jsonReportSection

        return
Ejemplo n.º 5
0
    def testPerformanceJSON(self):
        """
        _testPerformanceJSON_

        Verify that the performance section of the report is correctly converted
        to JSON.
        """
        xmlPath = os.path.join(getTestBase(),
                               "WMCore_t/FwkJobReport_t/PerformanceReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        perfSection = myReport.__to_json__(thunker = None)["steps"]["cmsRun1"]["performance"]

        self.assertTrue(perfSection.has_key("storage"),
                        "Error: Storage section is missing.")
        self.assertTrue(perfSection.has_key("memory"),
                        "Error: Memory section is missing.")
        self.assertTrue(perfSection.has_key("cpu"),
                        "Error: CPU section is missing.")

        self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105",
                         "Error: AvgEventCPU is wrong.")
        self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703",
                         "Error: TotalJobTime is wrong.")
        self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166,
                         "Error: readTotalMB is wrong.")
        self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653,
                         "Error: readMaxMSec is wrong")
        self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293",
                         "Error: PeakValueRss is wrong.")
        self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281",
                         "Error: PeakValueVsize is wrong.")
        return
Ejemplo n.º 6
0
    def testJSONEncoding(self):
        """
        _testJSONEncoding_

        Verify that turning the FWJR into a JSON object works correctly.
        """
        xmlPath = os.path.join(getTestBase(),
                               "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")
        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        jsonReport = myReport.__to_json__(None)

        assert "task" in jsonReport.keys(), \
               "Error: Task name missing from report."

        assert len(jsonReport["steps"].keys()) == 1, \
               "Error: Wrong number of steps in report."
        assert "cmsRun1" in jsonReport["steps"].keys(), \
               "Error: Step missing from json report."

        cmsRunStep = jsonReport["steps"]["cmsRun1"]

        jsonReportSections = ["status", "errors", "logs", "parameters", "site",
                              "analysis", "cleanup", "input", "output", "start"]
        for jsonReportSection in jsonReportSections:
            assert jsonReportSection in cmsRunStep.keys(), \
                "Error: missing section: %s" % jsonReportSection

        return
Ejemplo n.º 7
0
    def testPerformanceJSON(self):
        """
        _testPerformanceJSON_

        Verify that the performance section of the report is correctly converted
        to JSON.
        """
        xmlPath = os.path.join(
            getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(xmlPath)

        perfSection = myReport.__to_json__(
            thunker=None)["steps"]["cmsRun1"]["performance"]

        self.assertTrue("storage" in perfSection,
                        "Error: Storage section is missing.")
        self.assertTrue("memory" in perfSection,
                        "Error: Memory section is missing.")
        self.assertTrue("cpu" in perfSection, "Error: CPU section is missing.")

        self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105",
                         "Error: AvgEventCPU is wrong.")
        self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703",
                         "Error: TotalJobTime is wrong.")
        self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166,
                         "Error: readTotalMB is wrong.")
        self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653,
                         "Error: readMaxMSec is wrong")
        self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293",
                         "Error: PeakValueRss is wrong.")
        self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281",
                         "Error: PeakValueVsize is wrong.")
        return
Ejemplo n.º 8
0
    def testFallbackFilesJSON(self):
        """
        _testFallbackFilesJSON_

        Test that fallback attempt files are translated properly into JSON
        """

        # For negative control, check a good report with no skipped files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        goodJSON = goodReport.__to_json__(None)
        self.assertEqual(goodJSON['fallbackFiles'], [])

        # Check a report where all files were skipped
        badReport = Report("cmsRun1")
        badReport.parse(self.fallbackXmlPath)
        badJSON = badReport.__to_json__(None)
        self.assertEqual(len(badJSON['fallbackFiles']), 1)

        return
Ejemplo n.º 9
0
    def testFallbackFilesJSON(self):
        """
        _testFallbackFilesJSON_

        Test that fallback attempt files are translated properly into JSON
        """

        # For negative control, check a good report with no skipped files
        goodReport = Report("cmsRun1")
        goodReport.parse(self.xmlPath)
        goodJSON = goodReport.__to_json__(None)
        self.assertEqual(goodJSON['fallbackFiles'], [])

        # Check a report where all files were skipped
        badReport = Report("cmsRun1")
        badReport.parse(self.fallbackXmlPath)
        badJSON = badReport.__to_json__(None)
        self.assertEqual(len(badJSON['fallbackFiles']), 1)

        return
Ejemplo n.º 10
0
    def testMultiCoreReport(self):
        """
        _testMultiCoreReport_

        Verify that multicore reports can be json encoded and uploaded to couch.
        """
        couchdb = CouchServer(os.environ["COUCHURL"])
        fwjrdatabase = couchdb.connectDatabase("report_t/fwjrs")

        self.mcPath = os.path.join(getTestBase(),
                                   "WMCore_t/FwkJobReport_t/MulticoreReport.pkl")
        myReport = Report()
        myReport.unpersist(self.mcPath)

        fwjrDocument = {"_id": "303-0",
                        "jobid": 303,
                        "retrycount": 0,
                        "fwjr": myReport.__to_json__(None),
                        "type": "fwjr"}

        fwjrdatabase.queue(fwjrDocument, timestamp = True)
        fwjrdatabase.commit()
        return
Ejemplo n.º 11
0
        print("==== CMSSW Stack Execution FINISHING at %s ====" % time.asctime(time.gmtime()))
        logCMSSW()
    except WMExecutionFailure as WMex:
        print("ERROR: Caught WMExecutionFailure - code = %s - name = %s - detail = %s" % (WMex.code, WMex.name, WMex.detail))
        exmsg = WMex.name

        # Try to recover what we can from the FJR.  handleException will use this if possible.
        if os.path.exists('FrameworkJobReport.xml'):
            try:
                report = Report("cmsRun")
                report.parse('FrameworkJobReport.xml', "cmsRun")
                try:
                    jobExitCode = report.getExitCode()
                except:
                    jobExitCode = WMex.code
                report = report.__to_json__(None)
                StripReport(report)
                report['jobExitCode'] = jobExitCode
                with open('jobReport.json', 'w') as of:
                    json.dump(report, of)
            except:
                print("WARNING: Failure when trying to parse FJR XML after job failure.  Traceback follows.")
                print(traceback.format_exc())

        handleException("FAILED", WMex.code, exmsg)
        mintime()
        sys.exit(WMex.code)
    except Exception as ex:
        #print "jobExitCode = %s" % jobExitCode
        handleException("FAILED", EC_CMSRunWrapper, "failed to generate cmsRun cfg file at runtime")
        mintime()
Ejemplo n.º 12
0
def parse_fwk_report(data, config, report_filename):
    """Extract task data from a framework report.

    Analyze the CMSSW job framework report to get the CMSSW exit code,
    skipped files, runs and lumis processed on a file basis, total events
    written, and CPU time overall and per event.
    """
    exit_code = 0
    skipped = []
    infos = {}
    written = 0
    eventsPerRun = 0

    report = Report("cmsrun")
    report.parse(report_filename)

    exit_code = report.getExitCode()

    for fn in report.getAllSkippedFiles():
        fn = config['file map'].get(fn, fn)
        skipped.append(fn)

    outinfos = {}
    for file in report.getAllFiles():
        pfn = file['pfn']
        outinfos[pfn] = {
            'runs': {},
            'events': file['events'],
        }
        written += int(file['events'])
        for run in file['runs']:
            try:
                outinfos[pfn]['runs'][run.run].extend(run.lumis)
            except KeyError:
                outinfos[pfn]['runs'][run.run] = run.lumis

    for file in report.getAllInputFiles():
        filename = file['lfn'] if len(file['lfn']) > 0 else file['pfn']
        filename = config['file map'].get(filename, filename)
        file_lumis = []
        try:
            for run in file['runs']:
                for lumi in run.lumis:
                    file_lumis.append((run.run, lumi))
        except AttributeError:
            logger.info('Detected file-based task')
        infos[filename] = (int(file['events']), file_lumis)
        eventsPerRun += infos[filename][0]

    serialized = report.__to_json__(None)
    cputime = float(serialized['steps']['cmsrun']['performance']['cpu'].get('TotalJobCPU', '0'))

    data['files']['info'] = infos
    data['files']['output_info'] = outinfos
    data['files']['skipped'] = skipped
    data['events_written'] = written
    data['exe_exit_code'] = exit_code
    # For efficiency, we care only about the CPU time spent processing
    # events
    data['cpu_time'] = cputime
    data['events_per_run'] = eventsPerRun
Ejemplo n.º 13
0
    except WMExecutionFailure as WMex:
        print(
            "ERROR: Caught WMExecutionFailure - code = %s - name = %s - detail = %s"
            % (WMex.code, WMex.name, WMex.detail))
        exmsg = WMex.name

        # Try to recover what we can from the FJR.  handleException will use this if possible.
        if os.path.exists('FrameworkJobReport.xml'):
            try:
                rep = Report("cmsRun")
                rep.parse('FrameworkJobReport.xml', "cmsRun")
                try:
                    jobExitCode = rep.getExitCode()
                except:
                    jobExitCode = WMex.code
                rep = rep.__to_json__(None)
                #save the virgin WMArchive report
                with open('WMArchiveReport.json', 'w') as of:
                    json.dump(rep, of)
                StripReport(rep)
                rep['jobExitCode'] = jobExitCode
                with open('jobReport.json', 'w') as of:
                    json.dump(rep, of)
            except:
                print(
                    "WARNING: Failure when trying to parse FJR XML after job failure."
                )

        handleException("FAILED", WMex.code, exmsg)
        mintime()
        sys.exit(WMex.code)
Ejemplo n.º 14
0
    except WMExecutionFailure as WMex:
        print(
            "ERROR: Caught WMExecutionFailure - code = %s - name = %s - detail = %s"
            % (WMex.code, WMex.name, WMex.detail))
        exmsg = WMex.name

        # Try to recover what we can from the FJR.  handleException will use this if possible.
        if os.path.exists('FrameworkJobReport.xml'):
            try:
                report = Report("cmsRun")
                report.parse('FrameworkJobReport.xml', "cmsRun")
                try:
                    jobExitCode = report.getExitCode()
                except:
                    jobExitCode = WMex.code
                report = report.__to_json__(None)
                StripReport(report)
                report['jobExitCode'] = jobExitCode
                with open('jobReport.json', 'w') as of:
                    json.dump(report, of)
            except:
                print(
                    "WARNING: Failure when trying to parse FJR XML after job failure.  Traceback follows."
                )
                print(traceback.format_exc())

        handleException("FAILED", WMex.code, exmsg)
        mintime()
        sys.exit(WMex.code)
    except Exception as ex:
        #print "jobExitCode = %s" % jobExitCode
def main():
    """
    _main_

    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    if 'manage' not in os.environ:
        os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage'

    ### Fetch the report pickle files from the component log
    command = ["tail", "-n1000", "install/wmagent/JobAccountant/ComponentLog"]
    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    logFiles = [line for line in out.splitlines() if 'install/wmagent/JobCreator/JobCache' in line]
    logFiles = [i.split()[2] for i in logFiles]
    msg = "Found %d pickle files to parse " % len(logFiles)

    ### Now unpickle each of these files and get their output files
    # also check whether any of them are duplicate
    lfn2PklDict = {}
    dupOutputPkl = {}  # string value with the dup LFN and keyed by the pickle file path
    jobReport = Report()
    for pklPath in logFiles:
        if not os.path.exists(pklPath):
            continue

        jobReport.load(pklPath)
        for e in jobReport.getAllFiles():
            lfn2PklDict.setdefault(e['lfn'], [])
            lfn2PklDict[e['lfn']].append(pklPath)

    # now check which files contain more than one pickle path (= created by diff jobs)
    dupFiles = []
    for lfn, pkls in lfn2PklDict.iteritems():
        if len(pkls) > 1:
            dupFiles.append(lfn)
            for pkl in pkls:
                if pkl not in dupOutputPkl:
                    jobReport.load(pkl)
                    dupOutputPkl[pkl] = jobReport.__to_json__(None)
                    dupOutputPkl[pkl]['dup_lfns'] = []
                dupOutputPkl[pkl]['dup_lfns'].append(lfn)

    msg += "with a total of %d output files and %d duplicated" % (len(lfn2PklDict), len(dupFiles))
    msg += " files to process among them."
    msg += "\nDuplicate files are:\n%s" % dupFiles
    print(msg)

    if dupFiles:
        print("See dupPickles.json for further details ...")
        with open('dupPickles.json', 'w') as fo:
            json.dump(dupOutputPkl, fo, indent=2)

    if dupFiles:
        var = raw_input("Can we automatically delete those pickle files? Y/N\n")
        if var == "Y":
            # then delete all job report files but the first one - NOT ideal
            for fname in dupFiles:
                for pklFile in lfn2PklDict[fname][1:]:
                    if os.path.isfile(pklFile):
                        print("Deleting %s ..." % pklFile)
                        os.remove(pklFile)
                    else:
                        print("    File has probably been already deleted %s ..." % pklFile)
            print("  Done!")

    ### Time to load all - this is BAD - LFNs from WMBS database
    print("\nNow loading all LFNs from wmbs_file_details ...")
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)
    output = myThread.transaction.processData("SELECT lfn FROM wmbs_file_details")
    lfnsDB = formatter.format(output)
    lfnsDB = [item[0] for item in lfnsDB]
    print("Retrieved %d lfns from wmbs_file_details" % len(lfnsDB))

    ### Compare what are the duplicates
    dupFiles = list(set(lfn2PklDict.keys()) & set(lfnsDB))
    print("\nFound %d duplicate files." % len(dupFiles))
    if len(dupFiles) == 0:
        sys.exit(0)

    ### Print some basic data about these reports
    print("Their overview is: ")
    dbDupPkl = []
    for fname in dupFiles:
        for pklPath in lfn2PklDict[fname]:
            jobInfo = {'lfn': fname}
            jobInfo['pklPath'] = pklPath

            jobReport.load(pklPath)
            jobInfo['exitCode'] = jobReport.getExitCode()
            jobInfo['taskSuccess'] = jobReport.taskSuccessful()
            jobInfo['EOSLogURL'] = jobReport.getLogURL()
            jobInfo['HostName'] = jobReport.getWorkerNodeInfo()['HostName']
            jobInfo['Site'] = jobReport.getSiteName()
            jobInfo['task'] = jobReport.getTaskName()

            dbDupPkl.append(jobInfo)

    print(pformat(dbDupPkl))
    print("")

    print("Remove them, restart the component and be happy!\n")
    sys.exit(0)
Ejemplo n.º 16
0
        print("==== CMSSW Stack Execution FINISHED at %s ====" % time.asctime(time.gmtime()))
        logCMSSW()
    except WMExecutionFailure as WMex:
        print("ERROR: Caught WMExecutionFailure - code = %s - name = %s - detail = %s" % (WMex.code, WMex.name, WMex.detail))
        exmsg = WMex.name

        # Try to recover what we can from the FJR.  handleException will use this if possible.
        if os.path.exists('FrameworkJobReport.xml'):
            try:
                rep = Report("cmsRun")
                rep.parse('FrameworkJobReport.xml', "cmsRun")
                try:
                    jobExitCode = rep.getExitCode()
                except:
                    jobExitCode = WMex.code
                rep = rep.__to_json__(None)
                #save the virgin WMArchive report
                with open('WMArchiveReport.json', 'w') as of:
                    json.dump(rep, of)
                StripReport(rep)
                rep['jobExitCode'] = jobExitCode
                with open('jobReport.json', 'w') as of:
                    json.dump(rep, of)
            except:
                print("WARNING: Failure when trying to parse FJR XML after job failure.")

        handleException("FAILED", WMex.code, exmsg)
        mintime()
        sys.exit(WMex.code)
    except Exception as ex:
        #print "jobExitCode = %s" % jobExitCode