Python Report.getAllFiles Examples

Programming Language: Python

Namespace/Package Name: WMCore.FwkJobReport.Report

Class/Type: Report

Method/Function: getAllFiles

Examples at hotexamples.com: 3

Python Report.getAllFiles - 3 examples found. These are the top rated real world Python examples of WMCore.FwkJobReport.Report.Report.getAllFiles extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Report(30)

addError(21)

__to_json__(8)

getAllFileRefsFromStep(6)

addStep(4)

addOutputFile(3)

getAnalysisFilesFromStep(3)

getAllInputFiles(3)

getAllFilesFromStep(3)

getAllSkippedFiles(2)

getAllFallbackFiles(2)

getAllFiles(2)

getLogURL(1)

deleteOutputModuleForStep(1)

getAllFileRefs(1)

data(1)

checkForRunLumiInformation(1)

checkForOutputFiles(1)

checkForAdlerChecksum(1)

addOutputModule(1)

getWorkerNodeInfo(1)

Example #1

Show file

File: Report_t.py Project: ramonbrugman/WMCore

    def testNoLocationFile(self):
        """
        _testNoLocationFile_

        Check how we deal with output files without a valid location
        """
        jobReport = Report()
        jobReport.load(self.noLocationReport)
        fileList = jobReport.getAllFiles()
        self.assertEqual(len(fileList), 2)
        self.assertItemsEqual(fileList[0]['locations'], {})
        self.assertEqual(fileList[0]['outputModule'], "RAWSIMoutput")
        self.assertItemsEqual(fileList[1]['locations'], {"T2_CH_CSCS"})
        self.assertEqual(fileList[1]['outputModule'], "logArchive")

Example #2

Show file

def parse_fwk_report(data, config, report_filename):
    """Extract task data from a framework report.

    Analyze the CMSSW job framework report to get the CMSSW exit code,
    skipped files, runs and lumis processed on a file basis, total events
    written, and CPU time overall and per event.
    """
    exit_code = 0
    skipped = []
    infos = {}
    written = 0
    eventsPerRun = 0

    report = Report("cmsrun")
    report.parse(report_filename)

    exit_code = report.getExitCode()

    for fn in report.getAllSkippedFiles():
        fn = config['file map'].get(fn, fn)
        skipped.append(fn)

    outinfos = {}
    for file in report.getAllFiles():
        pfn = file['pfn']
        outinfos[pfn] = {
            'runs': {},
            'events': file['events'],
        }
        written += int(file['events'])
        for run in file['runs']:
            try:
                outinfos[pfn]['runs'][run.run].extend(run.lumis)
            except KeyError:
                outinfos[pfn]['runs'][run.run] = run.lumis

    for file in report.getAllInputFiles():
        filename = file['lfn'] if len(file['lfn']) > 0 else file['pfn']
        filename = config['file map'].get(filename, filename)
        file_lumis = []
        try:
            for run in file['runs']:
                for lumi in run.lumis:
                    file_lumis.append((run.run, lumi))
        except AttributeError:
            logger.info('Detected file-based task')
        infos[filename] = (int(file['events']), file_lumis)
        eventsPerRun += infos[filename][0]

    serialized = report.__to_json__(None)
    cputime = float(serialized['steps']['cmsrun']['performance']['cpu'].get('TotalJobCPU', '0'))

    data['files']['info'] = infos
    data['files']['output_info'] = outinfos
    data['files']['skipped'] = skipped
    data['events_written'] = written
    data['exe_exit_code'] = exit_code
    # For efficiency, we care only about the CPU time spent processing
    # events
    data['cpu_time'] = cputime
    data['events_per_run'] = eventsPerRun

Example #3

Show file

File: removeDupJobAccountant.py Project: amaltaro/ProductionTools

def main():
    """
    _main_

    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    if 'manage' not in os.environ:
        os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage'

    ### Fetch the report pickle files from the component log
    command = ["tail", "-n1000", "install/wmagent/JobAccountant/ComponentLog"]
    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    logFiles = [line for line in out.splitlines() if 'install/wmagent/JobCreator/JobCache' in line]
    logFiles = [i.split()[2] for i in logFiles]
    msg = "Found %d pickle files to parse " % len(logFiles)

    ### Now unpickle each of these files and get their output files
    # also check whether any of them are duplicate
    lfn2PklDict = {}
    dupOutputPkl = {}  # string value with the dup LFN and keyed by the pickle file path
    jobReport = Report()
    for pklPath in logFiles:
        if not os.path.exists(pklPath):
            continue

        jobReport.load(pklPath)
        for e in jobReport.getAllFiles():
            lfn2PklDict.setdefault(e['lfn'], [])
            lfn2PklDict[e['lfn']].append(pklPath)

    # now check which files contain more than one pickle path (= created by diff jobs)
    dupFiles = []
    for lfn, pkls in lfn2PklDict.iteritems():
        if len(pkls) > 1:
            dupFiles.append(lfn)
            for pkl in pkls:
                if pkl not in dupOutputPkl:
                    jobReport.load(pkl)
                    dupOutputPkl[pkl] = jobReport.__to_json__(None)
                    dupOutputPkl[pkl]['dup_lfns'] = []
                dupOutputPkl[pkl]['dup_lfns'].append(lfn)

    msg += "with a total of %d output files and %d duplicated" % (len(lfn2PklDict), len(dupFiles))
    msg += " files to process among them."
    msg += "\nDuplicate files are:\n%s" % dupFiles
    print(msg)

    if dupFiles:
        print("See dupPickles.json for further details ...")
        with open('dupPickles.json', 'w') as fo:
            json.dump(dupOutputPkl, fo, indent=2)

    if dupFiles:
        var = raw_input("Can we automatically delete those pickle files? Y/N\n")
        if var == "Y":
            # then delete all job report files but the first one - NOT ideal
            for fname in dupFiles:
                for pklFile in lfn2PklDict[fname][1:]:
                    if os.path.isfile(pklFile):
                        print("Deleting %s ..." % pklFile)
                        os.remove(pklFile)
                    else:
                        print("    File has probably been already deleted %s ..." % pklFile)
            print("  Done!")

    ### Time to load all - this is BAD - LFNs from WMBS database
    print("\nNow loading all LFNs from wmbs_file_details ...")
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)
    output = myThread.transaction.processData("SELECT lfn FROM wmbs_file_details")
    lfnsDB = formatter.format(output)
    lfnsDB = [item[0] for item in lfnsDB]
    print("Retrieved %d lfns from wmbs_file_details" % len(lfnsDB))

    ### Compare what are the duplicates
    dupFiles = list(set(lfn2PklDict.keys()) & set(lfnsDB))
    print("\nFound %d duplicate files." % len(dupFiles))
    if len(dupFiles) == 0:
        sys.exit(0)

    ### Print some basic data about these reports
    print("Their overview is: ")
    dbDupPkl = []
    for fname in dupFiles:
        for pklPath in lfn2PklDict[fname]:
            jobInfo = {'lfn': fname}
            jobInfo['pklPath'] = pklPath

            jobReport.load(pklPath)
            jobInfo['exitCode'] = jobReport.getExitCode()
            jobInfo['taskSuccess'] = jobReport.taskSuccessful()
            jobInfo['EOSLogURL'] = jobReport.getLogURL()
            jobInfo['HostName'] = jobReport.getWorkerNodeInfo()['HostName']
            jobInfo['Site'] = jobReport.getSiteName()
            jobInfo['task'] = jobReport.getTaskName()

            dbDupPkl.append(jobInfo)

    print(pformat(dbDupPkl))
    print("")

    print("Remove them, restart the component and be happy!\n")
    sys.exit(0)