Example #1
0
    def testNoLocationFile(self):
        """
        _testNoLocationFile_

        Check how we deal with output files without a valid location
        """
        jobReport = Report()
        jobReport.load(self.noLocationReport)
        fileList = jobReport.getAllFiles()
        self.assertEqual(len(fileList), 2)
        self.assertItemsEqual(fileList[0]['locations'], {})
        self.assertEqual(fileList[0]['outputModule'], "RAWSIMoutput")
        self.assertItemsEqual(fileList[1]['locations'], {"T2_CH_CSCS"})
        self.assertEqual(fileList[1]['outputModule'], "logArchive")
Example #2
0
def parse_fwk_report(data, config, report_filename):
    """Extract task data from a framework report.

    Analyze the CMSSW job framework report to get the CMSSW exit code,
    skipped files, runs and lumis processed on a file basis, total events
    written, and CPU time overall and per event.
    """
    exit_code = 0
    skipped = []
    infos = {}
    written = 0
    eventsPerRun = 0

    report = Report("cmsrun")
    report.parse(report_filename)

    exit_code = report.getExitCode()

    for fn in report.getAllSkippedFiles():
        fn = config['file map'].get(fn, fn)
        skipped.append(fn)

    outinfos = {}
    for file in report.getAllFiles():
        pfn = file['pfn']
        outinfos[pfn] = {
            'runs': {},
            'events': file['events'],
        }
        written += int(file['events'])
        for run in file['runs']:
            try:
                outinfos[pfn]['runs'][run.run].extend(run.lumis)
            except KeyError:
                outinfos[pfn]['runs'][run.run] = run.lumis

    for file in report.getAllInputFiles():
        filename = file['lfn'] if len(file['lfn']) > 0 else file['pfn']
        filename = config['file map'].get(filename, filename)
        file_lumis = []
        try:
            for run in file['runs']:
                for lumi in run.lumis:
                    file_lumis.append((run.run, lumi))
        except AttributeError:
            logger.info('Detected file-based task')
        infos[filename] = (int(file['events']), file_lumis)
        eventsPerRun += infos[filename][0]

    serialized = report.__to_json__(None)
    cputime = float(serialized['steps']['cmsrun']['performance']['cpu'].get('TotalJobCPU', '0'))

    data['files']['info'] = infos
    data['files']['output_info'] = outinfos
    data['files']['skipped'] = skipped
    data['events_written'] = written
    data['exe_exit_code'] = exit_code
    # For efficiency, we care only about the CPU time spent processing
    # events
    data['cpu_time'] = cputime
    data['events_per_run'] = eventsPerRun
def main():
    """
    _main_

    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    if 'manage' not in os.environ:
        os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage'

    ### Fetch the report pickle files from the component log
    command = ["tail", "-n1000", "install/wmagent/JobAccountant/ComponentLog"]
    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    logFiles = [line for line in out.splitlines() if 'install/wmagent/JobCreator/JobCache' in line]
    logFiles = [i.split()[2] for i in logFiles]
    msg = "Found %d pickle files to parse " % len(logFiles)

    ### Now unpickle each of these files and get their output files
    # also check whether any of them are duplicate
    lfn2PklDict = {}
    dupOutputPkl = {}  # string value with the dup LFN and keyed by the pickle file path
    jobReport = Report()
    for pklPath in logFiles:
        if not os.path.exists(pklPath):
            continue

        jobReport.load(pklPath)
        for e in jobReport.getAllFiles():
            lfn2PklDict.setdefault(e['lfn'], [])
            lfn2PklDict[e['lfn']].append(pklPath)

    # now check which files contain more than one pickle path (= created by diff jobs)
    dupFiles = []
    for lfn, pkls in lfn2PklDict.iteritems():
        if len(pkls) > 1:
            dupFiles.append(lfn)
            for pkl in pkls:
                if pkl not in dupOutputPkl:
                    jobReport.load(pkl)
                    dupOutputPkl[pkl] = jobReport.__to_json__(None)
                    dupOutputPkl[pkl]['dup_lfns'] = []
                dupOutputPkl[pkl]['dup_lfns'].append(lfn)

    msg += "with a total of %d output files and %d duplicated" % (len(lfn2PklDict), len(dupFiles))
    msg += " files to process among them."
    msg += "\nDuplicate files are:\n%s" % dupFiles
    print(msg)

    if dupFiles:
        print("See dupPickles.json for further details ...")
        with open('dupPickles.json', 'w') as fo:
            json.dump(dupOutputPkl, fo, indent=2)

    if dupFiles:
        var = raw_input("Can we automatically delete those pickle files? Y/N\n")
        if var == "Y":
            # then delete all job report files but the first one - NOT ideal
            for fname in dupFiles:
                for pklFile in lfn2PklDict[fname][1:]:
                    if os.path.isfile(pklFile):
                        print("Deleting %s ..." % pklFile)
                        os.remove(pklFile)
                    else:
                        print("    File has probably been already deleted %s ..." % pklFile)
            print("  Done!")

    ### Time to load all - this is BAD - LFNs from WMBS database
    print("\nNow loading all LFNs from wmbs_file_details ...")
    connectToDB()
    myThread = threading.currentThread()
    formatter = DBFormatter(logging, myThread.dbi)
    output = myThread.transaction.processData("SELECT lfn FROM wmbs_file_details")
    lfnsDB = formatter.format(output)
    lfnsDB = [item[0] for item in lfnsDB]
    print("Retrieved %d lfns from wmbs_file_details" % len(lfnsDB))

    ### Compare what are the duplicates
    dupFiles = list(set(lfn2PklDict.keys()) & set(lfnsDB))
    print("\nFound %d duplicate files." % len(dupFiles))
    if len(dupFiles) == 0:
        sys.exit(0)

    ### Print some basic data about these reports
    print("Their overview is: ")
    dbDupPkl = []
    for fname in dupFiles:
        for pklPath in lfn2PklDict[fname]:
            jobInfo = {'lfn': fname}
            jobInfo['pklPath'] = pklPath

            jobReport.load(pklPath)
            jobInfo['exitCode'] = jobReport.getExitCode()
            jobInfo['taskSuccess'] = jobReport.taskSuccessful()
            jobInfo['EOSLogURL'] = jobReport.getLogURL()
            jobInfo['HostName'] = jobReport.getWorkerNodeInfo()['HostName']
            jobInfo['Site'] = jobReport.getSiteName()
            jobInfo['task'] = jobReport.getTaskName()

            dbDupPkl.append(jobInfo)

    print(pformat(dbDupPkl))
    print("")

    print("Remove them, restart the component and be happy!\n")
    sys.exit(0)