Esempio n. 1
0
def _guess_file_type(fname, msg):
    """guess the type of an input file (bs,rdo,esd,aod,...)
    """
    input_type = None
    import PyUtils.AthFile as af
    try:
        file_type, file_name = af.ftype(fname)
    except Exception:
        raise  # for now
    if file_type == 'bs':
        input_type = 'bs'
    elif file_type == 'pool':
        import PyUtils.PoolFile as pf
        stream_names = pf.extract_stream_names(fname)
        stream_names = [s.lower() for s in stream_names]
        if len(stream_names) > 1:
            msg.warning('got many stream names: %r', stream_names)
            msg.warning('only considering the 1st one...')
        elif len(stream_names) <= 0:
            msg.warning('got an empty list of stream names')
            raise SystemExit(1)
        stream_name = stream_names[0]
        input_type = {
            'stream1': 'rdo',
            'streamesd': 'esd',
            'streamaod': 'aod',
            # FIXME: TODO: TAG, DPD
        }.get(stream_name, 'aod')

    else:
        msg.error('unknown file type (%s) for file [%s]', file_type, file_name)
    return input_type
Esempio n. 2
0
def _guess_file_type(fname, msg):
    """guess the type of an input file (bs,rdo,esd,aod,...)
    """
    input_type = None
    import PyUtils.AthFile as af
    try:
        file_type,file_name = af.ftype(fname)
    except Exception:
        raise # for now
    if file_type == 'bs':
        input_type = 'bs'
    elif file_type == 'pool':
        import PyUtils.PoolFile as pf
        stream_names = pf.extract_stream_names(fname)
        stream_names = [s.lower() for s in stream_names]
        if len(stream_names) > 1:
            msg.warning('got many stream names: %r', stream_names)
            msg.warning('only considering the 1st one...')
        elif len(stream_names) <= 0:
            msg.warning('got an empty list of stream names')
            raise SystemExit(1)
        stream_name = stream_names[0]
        input_type = {
            'stream1':    'rdo',
            'streamesd' : 'esd',
            'streamaod' : 'aod',
            # FIXME: TODO: TAG, DPD
            }.get(stream_name, 'aod')

    else:
        msg.error('unknown file type (%s) for file [%s]',
                  file_type, file_name)
    return input_type
Esempio n. 3
0
def getInputFileToNumEventMapping(m_list):
    """ Returns a dictionary of the file names
        and the number of events in each file
   """
    print(
        "|====================================================================================================================|"
    )
    print(
        "|                                         Doing the mapping from file to number of events                            |"
    )
    print(
        "|====================================================================================================================|"
    )
    m_numEventsPerFile = {}

    import PyUtils.PoolFile as PF

    failedFiles = [
        "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91890_lb7.root",
        "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91891_lb13.root",
        "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91891_lb24.root"
    ]

    for i in range(m_list.__len__()):
        if m_list[i] not in failedFiles:
            poolFile = PF.PoolFile(m_list[i])
            m_numEventsPerFile[m_list[i]] = int(
                poolFile.fileInfos().split()[6])

    print(m_numEventsPerFile)
    return m_numEventsPerFile
Esempio n. 4
0
def main(args):
    """diff two POOL files (containers and sizes)"""

    import os.path as osp
    old = osp.expandvars(osp.expanduser(args.old))
    new = osp.expandvars(osp.expanduser(args.new))

    import PyUtils.PoolFile as PF
    diff = PF.DiffFiles(refFileName=old, chkFileName=new, verbose=args.verbose)
    diff.printSummary()
    return diff.status()
Esempio n. 5
0
    def diff_pool(self, file_name, ref_file):
        """TBD."""
        import PyUtils.PoolFile as PF

        # diff-pool
        df = PF.DiffFiles(refFileName=ref_file, chkFileName=file_name, ignoreList=['RecoTimingObj_p1_RAWtoESD_timings', 'RecoTimingObj_p1_ESDtoAOD_timings'])
        df.printSummary()
        stat = df.status()
        print stat
        del df

        return stat
Esempio n. 6
0
 def _retrieve_items_from_input():
     items = []
     from AthenaCommon.AppMgr import ServiceMgr as svcMgr
     import PyUtils.PoolFile as _pf
     # only inspect the first input file
     # that should be enough as we don't really support varying schema
     # shapes anymore (as ROOT doesn't)
     items = _pf.extract_items(svcMgr.EventSelector.InputCollections[0])
     input_items = []
     for item in items:
         clid = _clid_from_string(item[0])
         if clid is None:
             msg.warning('could not infer clid for: "%s"', item[0])
             clid = item[0]  # put back the original string, then.
         input_items.append([clid, item[1]])
     return input_items
Esempio n. 7
0
def diffPoolFiles(ref,
                  chk,
                  details,
                  toIgnore=[
                      'RecoTimingObj_p1_RAWtoESD_timings',
                      'RecoTimingObj_p1_ESDtoAOD_timings'
                  ]):
    import PyUtils.PoolFile as PF
    try:
        df = PF.DiffFiles(refFileName=ref,
                          chkFileName=chk,
                          ignoreList=toIgnore)
        if details is None:
            df.printSummary()
        else:
            df.printSummary(details)
        stat = df.status()
        del df
    except:
        print("Exception caught while diff'ing POOL files")
        stat = True
    return stat
Esempio n. 8
0
def main(args):
    """read a POOL file and dump its content.
    """
    files = args.files
    if isinstance(files, basestring):
        files = [files]

    import sys
    import os
    import os.path as osp

    for i, f in enumerate(files):
        files[i] = osp.expandvars(osp.expanduser(f))

    exitcode = 0
    for fname in files:
        try:
            import PyUtils.PoolFile as PF
            PF.PoolOpts.FAST_MODE = args.fast
            pool_file = PF.PoolFile(fname)
            pool_file.checkFile(sorting=args.sort_fct)
            if args.detailed_dump:
                dump_file = osp.basename(fname) + '.txt'
                print "## dumping details into [%s]" % (dump_file, )
                pool_file.detailedDump(dump_file)
            if args.output:
                oname = args.output
                print "## saving report into [%s]..." % (oname, )
                pool_file.saveReport(oname)
        except Exception, e:
            print "## Caught exception [%s] !!" % str(e.__class__)
            print "## What:", e
            print sys.exc_info()[0]
            print sys.exc_info()[1]
            exitcode = 1
            pass

        except:
Esempio n. 9
0
                      action="store_true",
                      dest="strict",
                      default=False,
                      help="Compare both memSize and diskSize")

    (options, args) = parser.parse_args()

    if len(args) > 0 and args[0][0] != "-":
        options.refFileName = args[0]
        pass
    if len(args) > 1 and args[1][0] != "-":
        options.fileName = args[1]
        pass

    if options.fileName is None or options.refFileName is None:
        str(parser.print_help() or "")
        sys.exit(1)
        pass

    chkFileName = os.path.expandvars(os.path.expanduser(options.fileName))
    refFileName = os.path.expandvars(os.path.expanduser(options.refFileName))

    import PyUtils.PoolFile as PF
    diff = PF.DiffFiles(refFileName=refFileName,
                        chkFileName=chkFileName,
                        verbose=options.verbose,
                        strict=options.strict)

    diff.printSummary()
    sys.exit(diff.status())
Esempio n. 10
0
        pass

    fileNames = set(fileNames)
    # Check the consistency with the CSV output:
    if len(fileNames) > 1 and options.csvFileName:
        print(
            "WARNING  CSV output is only available when processing a single "
            "input file")
        pass

    # Loop over the specified file(s):
    for fileName in fileNames:

        # Open the file:
        import PyUtils.PoolFile as PF
        poolFile = PF.PoolFile(fileName)

        # Loop over all the branches of the file, and sum up the information
        # about them in a smart way...
        summedData = {}
        categData = {}
        categTrigData = {}
        categTrigDynVars = {}
        for d in poolFile.data:
            # Skip metadata/TAG/etc. branches:
            # if d.dirType != "B": continue
            # The name of this branch:
            brName = d.name
            # Check if this is a static auxiliary store:
            m = re.match("(.*)Aux\..*", d.name)
            if m:
Esempio n. 11
0
    def __init__(self,
                 TOTALCPUS,
                 LOCALDIR,
                 FILELIST,
                 OutputLevel,
                 doDetailedSplitting=False,
                 nEventsPerFile=-1):
        def sort_by_value(d):
            """ Returns the keys of dictionary d sorted by their values """
            items = d.items()
            backitems = [[v[1], v[0]] for v in items]
            backitems.sort()
            backitems.reverse()
            return [backitems[i][1] for i in range(0, len(backitems))]

        self.doDetailedSplitting = doDetailedSplitting
        self.OutputLevel = OutputLevel
        self.Files = FILELIST
        self.LocalDir = LOCALDIR
        self.nSubJobs = TOTALCPUS
        self.totalNumberOfEvents = 0
        self.totalNumberOfFiles = 0
        self.nEventsPerFile = nEventsPerFile
        inputFileList = []

        if not self.doDetailedSplitting:
            inputfiles = open(FILELIST, "r")
            filelist = inputfiles.read().split('\n')
            for i in range(0, len(filelist)):
                filelist[i] = filelist[i].rstrip()
            inputfiles.close()
            if not LOCALDIR:

                print("Reading Custom File")
                FinalListSorted = []
                for line in filelist:
                    if line and line[0] != '#':
                        FinalListSorted.append(line)

                #print (FinalListSorted)

            elif "castor" in LOCALDIR:
                print("Reading castor directory " + LOCALDIR +
                      " Please wait...")
                extendedFileList = os.popen("rfdir " +
                                            LOCALDIR[7:]).read().splitlines()
            else:
                print("Reading directory " + LOCALDIR + " Please wait...")
                extendedFileList = os.popen("ls -l " +
                                            LOCALDIR).read().splitlines()

            if LOCALDIR:
                i = 0
                SizeList = {}
                for line in extendedFileList:
                    curr = line.split()
                    if curr[0] != 'total':
                        #print (curr[8], " ", curr[4])
                        SizeList[i] = {}
                        SizeList[i][0] = curr[8].rstrip()
                        SizeList[i][1] = curr[4].rstrip()
                        i = i + 1
                FinalList = {}
                count = 0

                for i in range(0, len(SizeList)):
                    #print (SizeList[i][0])
                    if SizeList[i][0] in filelist:
                        #print ("Accepted")
                        #print (SizeList[i][0], " size:", SizeList[i][1])
                        FinalList[SizeList[i][0]] = int(SizeList[i][1])

                #SizeListSorted = [ (k,SizeList[k]) for k in sorted(SizeList.values())]

                FinalListSorted = sort_by_value(FinalList)
                #print ("Sorted list" )
                #for i in range(0,len(FinalListSorted)):
                #   print (FinalListSorted[i], "\tsize:\t", FinalList[FinalListSorted[i]])
            currCPU = 0
            self.CPUsFiles = {}
            nFiles = len(FinalListSorted)
            nRemainder = nFiles % TOTALCPUS
            nFilesPerCpu = nFiles / TOTALCPUS
            nFilesForThisCpu = 1
            sumFileSize = 0
            if len(FinalListSorted[0].split()) == 2:
                for i in range(0, nFiles):
                    sumFileSize += int(FinalListSorted[i].split()[1])
            print(sumFileSize)
            averageSizePerCpu = sumFileSize / TOTALCPUS * 0.97
            print(averageSizePerCpu)
            sumSizeOnCpu = 0
            #print ("NFile, Remainder, NfilesperCpu  ",  nFiles, " ", nRemainder, " ", nFilesPerCpu)
            #If the file size is present then use it to split the files
            if sumFileSize != 0:
                for i in range(0, nFiles):
                    if currCPU in self.CPUsFiles:
                        self.CPUsFiles[currCPU].append(
                            LOCALDIR + FinalListSorted[i].split()[0])
                    else:
                        self.CPUsFiles[currCPU] = [
                            LOCALDIR + FinalListSorted[i].split()[0]
                        ]
                        extraFiles = 0
                    sumSizeOnCpu += int(FinalListSorted[i].split()[1])
                    if (sumSizeOnCpu > averageSizePerCpu and i < nFiles - 1 and
                        (sumSizeOnCpu + int(FinalListSorted[i + 1].split()[1]))
                            > averageSizePerCpu * 1.04):
                        print("File size on CPU: ", currCPU, '\t',
                              sumSizeOnCpu)
                        currCPU = currCPU + 1
                        if currCPU >= TOTALCPUS:
                            currCPU = TOTALCPUS - 1
                        else:
                            sumSizeOnCpu = 0
                    elif (nFiles - i == TOTALCPUS - currCPU):
                        currCPU = currCPU + 1
                print("File size on CPU: ", currCPU, '\t', sumSizeOnCpu)
            else:
                for i in range(0, nFiles):
                    #print (FinalListSorted[i], "CPU: ", currCPU, " FPCPU: " , nFilesForThisCpu)
                    if currCPU in self.CPUsFiles:
                        self.CPUsFiles[currCPU].append(
                            LOCALDIR + FinalListSorted[i].split()[0])
                    else:
                        self.CPUsFiles[currCPU] = [
                            LOCALDIR + FinalListSorted[i].split()[0]
                        ]
                    extraFiles = 0
                    if (currCPU < nRemainder):
                        extraFiles = 1
                    if (nFilesForThisCpu < nFilesPerCpu + extraFiles):
                        nFilesForThisCpu = nFilesForThisCpu + 1
                    else:
                        currCPU = currCPU + 1
                        nFilesForThisCpu = 1

        # Doing the Detailed slitting
        else:
            import PyUtils.PoolFile as PF

            #Getting the number of events in each file.
            inputfiles = open(FILELIST, "r")
            numEventsPerFile = {}
            print("==================================================")
            print("The input file are: (May take some time..)")
            for line in inputfiles:
                if line.rstrip().find(".root") > 0:

                    fullFileName = self.LocalDir + "/" + line.rstrip()

                    inputFileList.append(fullFileName)

                    poolFile = PF.PoolFile(fullFileName)
                    thisNumEvents = int(poolFile.dataHeader.nEntries)

                    self.totalNumberOfEvents += thisNumEvents

                    self.totalNumberOfFiles += 1

                    print(fullFileName, " with ", thisNumEvents, " events")

                    numEventsPerFile[fullFileName] = thisNumEvents
            print("==================================================")

            #Getting the Number of events to process, to skip, and the inputFile

            # The relavent quantities for each subJob
            self.m_skipEvents = {}
            self.nEvents = 0
            self.m_inputFiles = {}

            # This means will do all the events
            if self.nEventsPerFile == -1:
                self.nEvents = int(self.totalNumberOfEvents / self.nSubJobs)
            else:
                self.nEvents = self.nEventsPerFile

            # local counters
            m_usedFiles = 0
            m_numberEventsUsed = 0
            for subJob in range(self.nSubJobs):
                self.m_inputFiles[subJob] = []
                m_eventsNeeded = self.nEvents

                while (m_eventsNeeded != 0
                       and m_usedFiles < self.totalNumberOfFiles):
                    # Two case the file indexed by m_usedFiles has enough event to complete
                    #  the events needed or it doesn't

                    # If it does
                    if m_eventsNeeded <= numEventsPerFile[
                            inputFileList[m_usedFiles]]:
                        numEventsPerFile[
                            inputFileList[m_usedFiles]] -= m_eventsNeeded

                        # Debugging
                        #print ("subJob",subJob)
                        #print ("m_eventsNeeded",m_eventsNeeded)
                        #print ("self.m_inputFiles",self.m_inputFiles)
                        #print ("m_usedFiles",m_usedFiles)

                        self.m_inputFiles[subJob].append(
                            inputFileList[m_usedFiles])
                        self.m_skipEvents[subJob] = m_numberEventsUsed
                        m_numberEventsUsed += m_eventsNeeded
                        m_eventsNeeded = 0
                        print("self.m_skipEvents[" + str(subJob) + "]",
                              self.m_skipEvents[subJob])
                        print("m_numberEventsUsed", m_numberEventsUsed)

                    # If it doesn't
                    else:
                        m_eventsNeeded -= numEventsPerFile[
                            inputFileList[m_usedFiles]]
                        self.m_skipEvents[subJob] = m_numberEventsUsed
                        self.m_inputFiles[subJob].append(
                            inputFileList[m_usedFiles])
                        m_usedFiles += 1
                        m_numberEventsUsed = 0
Esempio n. 12
0
for Path in paths:
    if "castor" in Path:
        print "Reading castor directory. Please wait..."
        inputfiles = os.popen("rfdir " + Path).read().splitlines()
    else:
        print "Reading directory. Please wait..."
        inputfiles = os.popen("ls -l " + Path).read().splitlines()

    for line in inputfiles:
        filename = line.split()[8]
        if "root" in filename or "ESD" in filename or "data" in filename:
            fullFilename = Path + filename
            try:
                if ReadNEvents:
                    poolFile = PF.PoolFile(fullFilename)
                    numEvents = int(poolFile.dataHeader.nEntries)
                    print filename
                    print numEvents

                    if numEvents > EventCut:
                        outputText.write(fullFilename + '\t' + str(numEvents) +
                                         '\n')
                    else:
                        print "File with few events, skipping..."
                else:
                    if not PoolFileCatalog:
                        outputText.write(fullFilename + '\n')
                    if PoolFileCatalog:
                        print " Creating as PoolFileToCatalogue Please wait..."
                        print "  pool_insertFileToCatalog ", fullFilename
Esempio n. 13
0
    def finalize(self):
        import sys
        import PyUtils.PoolFile as PF

        class ShutUp:
            def __init__(self):
                self.save = os.dup(sys.stdout.fileno())
                self.quiet = open("/dev/null", "w")
                return

            def mute(self):
                os.dup2(self.quiet.fileno(), sys.stdout.fileno())
                return

            def unMute(self):
                os.dup2(self.save, sys.stdout.fileno())
                return

        def _unfold(pr):
            return {
                'name': pr.name,
                'memSize': pr.memSize,
                'diskSize': pr.diskSize,
                'memSizeNoZip': pr.memSizeNoZip,
                'nEntries': pr.nEntries,
                'dirType': pr.dirType,
                'details': pr.details,
            }

        import tempfile, atexit, os
        tmpfileno, tmpfile = tempfile.mkstemp(suffix='.py_shelve.dat')
        os.close(tmpfileno)
        atexit.register(os.unlink, tmpfile)

        import commands
        sc, checkfile = commands.getstatusoutput('which checkFile.py')
        if sc != 0:
            self.msg.info('could not fetch checkFile.py !')
            self.msg.info('no POOL file post-processing')
            return
        checkfile = os.path.realpath(checkfile)

        def run_check_file(pool_file, dbfile, app=checkfile):
            from subprocess import call
            with open('/dev/null', 'w') as dev_null:
                res = call([app, "-f", pool_file, "-o", dbfile, "--fast"],
                           stdout=dev_null,
                           stderr=dev_null)
            return res

        _msg = ShutUp()
        self.msg.info("Finalizing [%s]", self.name)
        ## FIXME: move to 'with' stmt when py2.5 is there for proper RAII
        ##        and leaner code...
        self.inputPoolFiles = [f for f in self.inputPoolFiles]
        if len(self.inputPoolFiles) > 0:
            self.msg.info("Content of input POOL files:")
            _msg.mute()
            _msg.unMute()
            for i, inFileName in enumerate(self.inputPoolFiles[:20]):
                try:
                    _msg.mute()
                    sc = run_check_file(pool_file=inFileName, dbfile=tmpfile)
                    if sc != 0:
                        raise RuntimeError \
                              ('error running check file (rc=%r)'%sc)
                    inFile = PF.PoolFile(tmpfile)
                    _msg.unMute()
                    self.svc.meta['inputPoolFiles/%i' % i] = {
                        'infos': inFile._fileInfos,
                        'nbrEvts': inFile.dataHeader.nEntries,
                        'dataHeader': [_unfold(inFile.dataHeader)],
                        'data': [_unfold(p) for p in inFile.data]
                    }
                    self.msg.info(" - [%s] %8.3f kb (%i events)",
                                  inFile._fileInfos['name'],
                                  inFile._fileInfos['size'] / Units.kB,
                                  inFile.dataHeader.nEntries)
                    del inFile
                except Exception, err:
                    _msg.unMute()
                    self.msg.info("Could not run checkFile on [%s] !!",
                                  inFileName)
                    self.msg.info("Reason: %s", err)
                    if 'inFile' in dir(): del inFile
                _msg.unMute()
Esempio n. 14
0
    if options.fileName == None and len(fileNames) == 0:
        str(parser.print_help() or "")
        sys.exit(1)

    if options.fileName != None:
        fileName = os.path.expandvars(os.path.expanduser(options.fileName))
        fileNames.append(fileName)

    fileNames = set(fileNames)
    sc = 0
    for fileName in fileNames:
        try:
            import PyUtils.PoolFile as PF
            PF.PoolOpts.FAST_MODE = options.fastMode
            PF.PoolOpts.SUPER_DETAILED_BRANCH_SZ = options.super_detailed_branch_sz
            poolFile = PF.PoolFile(fileName)
            poolFile.checkFile(sorting=options.sortFctName)
            if options.doDetailedDump:
                dumpFile = os.path.basename(fileName) + ".txt"
                print "## dumping details into [%s]" % dumpFile
                poolFile.detailedDump(dumpFile)
            if options.outFileName:
                outFileName = options.outFileName
                print "## saving checkFile report into [%s]..." % outFileName
                poolFile.saveReport(outFileName)
        except Exception, e:
            print "## Caught exception [%s] !!" % str(e.__class__)
            print "## What:", e
            print sys.exc_info()[0]
            print sys.exc_info()[1]
            sc = 1
Esempio n. 15
0
def _setup():

    global inputFileSummary
    import os
    from RecExConfig.RecFlags import rec
    import AthenaCommon.Logging as L
    from AthenaCommon.Resilience import treatException

    #define a logger
    msg = L.logging.getLogger('inputFilePeeker' )
    msg.info("Executing   inputFilePeeker.py")

    # special setup for online reconstruction so far
    from AthenaCommon.AthenaCommonFlags import athenaCommonFlags
    if athenaCommonFlags.isOnline():
        # set minimal items of inputFileSummary
        inputFileSummary={'file_type':'bs',
                          'evt_type':['IS_DATA','IS_ATLAS','IS_PHYSICS'],
                          'TagStreamsRef':''}
        return
    
    #get input file name
    from RecExConfig.RecoFunctions import InputFileNames
    inFiles=InputFileNames()
    if len(inFiles) < 1:
        msg.error("No input files specified yet! Cannot do anything.")

    #create and fill inputFileSummary (DC: looping through input files if necessary)
    import PyUtils.AthFile as athFile
    failed_trials = 0
    for inFile in inFiles:
        try:
            fi = athFile.fopen(inFile)
            inputFileSummary = fi.fileinfos
        except Exception as err:
            msg.warning("Unable to open file [%s]"%inFile)
            msg.warning('caught:\n%s',err)
            import traceback
            traceback.print_exc()
            continue

        ## Making sure that stream_names is always defined
        if 'stream_names' not in inputFileSummary:
            msg.warning("AthFile didn't find key 'stream_names'. Recovering it but that's unexpected.")
            inputFileSummary['stream_names']=[]
        
        #First try to catch the no entries case
        if inputFileSummary['stream_names'] == []:
            try:
                inputFileSummary['stream_names'] = [fi.infos['metadata_items'][0][1]]
            except Exception as err:
                msg.info("Unable to find stream names in file metadata.")

        #If stream_names still not found, check for bytestream case or give default value
        if inputFileSummary['stream_names']==None or inputFileSummary['stream_names']==[]:
            if inputFileSummary['file_type']=='bs':
                msg.info("stream_names not present in input bytestream file. Giving default name 'StreamRAW'")
                inputFileSummary['stream_names']=['StreamRAW']
            else:
                inputFileSummary['stream_names']=['Unknw']
                msg.warning("Unable to find stream_name from input file. This HAS an effect on auto-configuration!")

        #At this point streamsName is always setup

        #DR TAG do not have run number        
        if len(inputFileSummary['run_number']) >0 or 'TAG' in inputFileSummary['stream_names'] :
            msg.info("Successfully filled inputFileSummary from file %s"%inFile)
            break
        else:
            msg.warning("Unable to fill inputFileSummary from file %s. File is probably empty. Will try again with next (if any)."%inFile)

        ## everything failed...
            failed_trials += 1
        ## now, we failed too many times.
        ## trigger an athfile cache-flush to not waste too much memory
        ## with file summaries which are irrelevant.
        ## FIXME: should the trigger be jobo-settable ?
        if failed_trials > 10:
            msg.warning("Unable to fill inputFileSummary [%d] times. flushing athfile cache..." % failed_trials)
            athFile.flush_cache()
        pass

    #consistency check
    if len(inputFileSummary.items()) < 1:
        msg.error("Unable to build inputFileSummary from any of the specified input files. There is probably a problem.")
        return

    #Exception: if input is TAG, you need to follow the link to fill inputFileSummary
    msg.info("Extracted streams %s from input file " % inputFileSummary['stream_names'] )   


    inputFileSummary['TagStreamsRef']=None

    if 'TAG' in inputFileSummary['stream_names']:

        import PyUtils.PoolFile as pf
        tagStreamsRef=pf.extract_streams_from_tag(inFile,nentries=1)

        from RecExConfig.AutoConfiguration import GetDefaultTagRefStream
        streamTarget=GetDefaultTagRefStream(tagStreamsRef)
        msg.info ( "will redirect to target %s " % streamTarget )

        # now get the file on which the TAG is pointing
        from PyUtils.PoolFile import PoolFileCatalog as pfc
        newInFile=None
        catalog_name = [pfc.DefaultCatalog]
        # ensure consistency of (read) catalogs' content b/w
        # python PoolFileCatalog and C++ PoolSvc.ReadCatalog
        try:
            from AthenaCommon.AppMgr import ServiceMgr as svcMgr
            if (hasattr(svcMgr, 'PoolSvc') and
                hasattr(svcMgr.PoolSvc, 'ReadCatalog')):
                # add PoolSvc.ReadCatalog, not overwrite, otherwise
                # default PoolFileCatalog would be removed
                catalog_name += list(svcMgr.PoolSvc.ReadCatalog[:])
                pass
        except Exception as err:
            msg.info(
                'problem getting ReadCatalog value from svcMgr.PoolSvc:\n%s',
                err)
            pass
        try:
            #get guid of file to be navigated to, then get corresponding physics file name
            aTagStreamsRef=tagStreamsRef[streamTarget][0]
            newInFile=pfc(catalog=catalog_name).pfn(aTagStreamsRef)
            msg.info ( "reading TAG redirected to file fid: %s pfn:%s " % (aTagStreamsRef,newInFile))
            try:
                fi = athFile.fopen(newInFile)
            except Exception:    
                msg.warning ( "AthFile.fopen failed ! Could not redirect input TAG to first target file %s. Probably not available. Now trying them all." % newInfile )
                newInFile=None
        except Exception:
            msg.warning ( "could not redirect input TAG to first target file %s. Probably not in catalog. Now trying them all." % aTagStreamsRef )
            newInFile=None

        if newInFile is None:    
            #get ALL guid of all files to be navigated to, then get corresponding physics file name of first available one
            allTagStreamsRef=pf.extract_streams_from_tag(inFile)[streamTarget]            
            for aTagStreamsRef in allTagStreamsRef:
                try:
                    newInFile=pfc(catalog=catalog_name).pfn(aTagStreamsRef)
                    fi = athFile.fopen(newInFile)
                    msg.info ( "finally redirected input TAG to file fid: %s pfn:%s " % (aTagStreamsRef,newInFile))
                    break
                except Exception:

                    newInFile=None

        if newInFile is None:
            raise RuntimeError ("unable to redirect tag to any file. Autoconfiguration fails")
        else:
            inputFileSummary = fi.fileinfos
            # store information in inputFileSummary
            inputFileSummary['TagStreamsRef']=inFile

        
    #event data

    from RecExConfig.RecoFunctions import ListOfTupleToDic
    if 'eventdata_items' not in inputFileSummary:
        inputFileSummary['eventdata_items']=[]
        pass
    fullListTuple = inputFileSummary['eventdata_items']
    inputFileSummary['eventdata_itemsDic']=ListOfTupleToDic(fullListTuple)
    fullList = []
    if fullListTuple:
        for iTuple in fullListTuple :
            item = '%s#%s' % iTuple
            fullList.append(item)
            pass
    inputFileSummary['eventdata_itemsList']=fullList
    
    
    #meta-data
    if 'metadata_items' not in inputFileSummary:
        inputFileSummary['metadata_items']=[]
        pass
    fullListTuple = inputFileSummary['metadata_items']
    inputFileSummary['metadata_itemsDic']=ListOfTupleToDic(fullListTuple)
    fullList = []
    if fullListTuple:
        for iTuple in fullListTuple :
            item = '%s#%s' % iTuple
            fullList.append(item)
            pass
    inputFileSummary['metadata_itemsList']=fullList

    #Catch common problems
    if inputFileSummary['conditions_tag']==None:
        inputFileSummary['conditions_tag']=""

    if inputFileSummary['evt_type']==[] and inputFileSummary['file_type']=='bs':
        inputFileSummary['evt_type']=('IS_DATA', 'Unknown', 'Unknown')
        msg.warning('Bytestream input: guessing that evt_type=IS_DATA, but this is not 100% certain. Using auto-configuration is not safe if this info is wrong.')

    if inputFileSummary['evt_type']==[] and inputFileSummary['nentries']==0:             
        conditionsTag=inputFileSummary['conditions_tag']
        if conditionsTag.find('SIM')>0:
            inputFileSummary['evt_type']=('IS_SIMULATION', 'IS_ATLAS', 'IS_PHYSICS')
        else:
            inputFileSummary['evt_type']=('IS_DATA', 'Unknown', 'Unknown')
            pass
        msg.warning("Input file has zero events and hence no EventInfo object. Guessed that evt_type=%s, but this is not certain. Using auto-configuration is not safe if this info is wrong."%(inputFileSummary['evt_type'][0]))
        pass

    #Final print out (DEBUG)
    msg.debug("inputFileSummary is:")
    msg.debug(str(inputFileSummary))
    return
Esempio n. 16
0
class PoolMonTool(object):
    """
    Persistency monitoring tool: measures memory and disk sizes of input and
    output containers (in POOL files)
    """
    def __init__(self, svc):
        super(PoolMonTool, self).__init__()
        self.svc = svc
        self.name = svc.name + ".Pool"
        from AthenaCommon.AppMgr import ServiceMgr as svcMgr
        inFiles = set()
        if hasattr(svcMgr,'EventSelector') and \
           hasattr(svcMgr.EventSelector, 'InputCollections') :
            for inFile in svcMgr.EventSelector.InputCollections:
                if inFile.startswith("ROOTTREE:"):
                    inFile = inFile[len("ROOTTREE:"):]
                inFiles.add(inFile)
        outFiles = set()
        from AthenaCommon import CfgMgr
        from AthenaCommon.Configurable import Configurable
        for c in Configurable.allConfigurables.values():
            if not isinstance(c, CfgMgr.AthenaOutputStream): continue
            try:
                outFile = c.properties()["OutputFile"]
            except KeyError:
                continue
            if outFile.startswith("ROOTTREE:"):
                outFile = outFile[len("ROOTTREE:"):]
            outFiles.add(outFile)

        self.inputPoolFiles = [i for i in inFiles]
        self.outputPoolFiles = [o for o in outFiles]

    @property
    def msg(self):
        import AthenaCommon.Logging as L
        return L.logging.getLogger(self.name)

    def initialize(self):
        self.msg.info("Initializing [%s]", self.name)
        self.msg.info("InputPoolFiles:  %r", self.inputPoolFiles)
        self.msg.info("OutputPoolFiles: %r", self.outputPoolFiles)
        return

    def finalize(self):
        import sys
        import PyUtils.PoolFile as PF

        class ShutUp:
            def __init__(self):
                self.save = os.dup(sys.stdout.fileno())
                self.quiet = open("/dev/null", "w")
                return

            def mute(self):
                os.dup2(self.quiet.fileno(), sys.stdout.fileno())
                return

            def unMute(self):
                os.dup2(self.save, sys.stdout.fileno())
                return

        def _unfold(pr):
            return {
                'name': pr.name,
                'memSize': pr.memSize,
                'diskSize': pr.diskSize,
                'memSizeNoZip': pr.memSizeNoZip,
                'nEntries': pr.nEntries,
                'dirType': pr.dirType,
                'details': pr.details,
            }

        import tempfile, atexit, os
        tmpfileno, tmpfile = tempfile.mkstemp(suffix='.py_shelve.dat')
        os.close(tmpfileno)
        atexit.register(os.unlink, tmpfile)

        import commands
        sc, checkfile = commands.getstatusoutput('which checkFile.py')
        if sc != 0:
            self.msg.info('could not fetch checkFile.py !')
            self.msg.info('no POOL file post-processing')
            return
        checkfile = os.path.realpath(checkfile)

        def run_check_file(pool_file, dbfile, app=checkfile):
            from subprocess import call
            with open('/dev/null', 'w') as dev_null:
                res = call([app, "-f", pool_file, "-o", dbfile, "--fast"],
                           stdout=dev_null,
                           stderr=dev_null)
            return res

        _msg = ShutUp()
        self.msg.info("Finalizing [%s]", self.name)
        ## FIXME: move to 'with' stmt when py2.5 is there for proper RAII
        ##        and leaner code...
        self.inputPoolFiles = [f for f in self.inputPoolFiles]
        if len(self.inputPoolFiles) > 0:
            self.msg.info("Content of input POOL files:")
            _msg.mute()
            _msg.unMute()
            for i, inFileName in enumerate(self.inputPoolFiles[:20]):
                try:
                    _msg.mute()
                    sc = run_check_file(pool_file=inFileName, dbfile=tmpfile)
                    if sc != 0:
                        raise RuntimeError \
                              ('error running check file (rc=%r)'%sc)
                    inFile = PF.PoolFile(tmpfile)
                    _msg.unMute()
                    self.svc.meta['inputPoolFiles/%i' % i] = {
                        'infos': inFile._fileInfos,
                        'nbrEvts': inFile.dataHeader.nEntries,
                        'dataHeader': [_unfold(inFile.dataHeader)],
                        'data': [_unfold(p) for p in inFile.data]
                    }
                    self.msg.info(" - [%s] %8.3f kb (%i events)",
                                  inFile._fileInfos['name'],
                                  inFile._fileInfos['size'] / Units.kB,
                                  inFile.dataHeader.nEntries)
                    del inFile
                except Exception, err:
                    _msg.unMute()
                    self.msg.info("Could not run checkFile on [%s] !!",
                                  inFileName)
                    self.msg.info("Reason: %s", err)
                    if 'inFile' in dir(): del inFile
                _msg.unMute()
        if len(self.outputPoolFiles) > 0:
            self.msg.info("Content of output POOL files:")
            for i, outFileName in enumerate(self.outputPoolFiles):
                try:
                    _msg.mute()
                    sc = run_check_file(pool_file=outFileName, dbfile=tmpfile)
                    if sc != 0:
                        raise RuntimeError \
                              ('error running check file (rc=%r)'%sc)

                    outFile = PF.PoolFile(tmpfile)
                    _msg.unMute()
                    self.svc.meta['outputPoolFiles/%i' % i] = {
                        'infos': outFile._fileInfos,
                        'nbrEvts': outFile.dataHeader.nEntries,
                        'dataHeader': [_unfold(outFile.dataHeader)],
                        'data': [_unfold(p) for p in outFile.data]
                    }
                    self.msg.info(" - [%s] %8.3f kb (%i events)",
                                  outFile._fileInfos['name'],
                                  outFile._fileInfos['size'] / Units.kB,
                                  outFile.dataHeader.nEntries)
                    del outFile
                except Exception, err:
                    _msg.unMute()
                    self.msg.info("Could not run checkFile on [%s] !!",
                                  outFileName)
                    self.msg.info("Reason: %s", err)
                    if 'outFile' in dir(): del outFile
                _msg.unMute()
Esempio n. 17
0
    #consistency check
    if len(inputFileSummary.items()) < 1:
        msg.error("Unable to build inputFileSummary from any of the specified input files. There is probably a problem.")
        return

    #Exception: if input is TAG, you need to follow the link to fill inputFileSummary
    msg.info("Extracted streams %s from input file " % inputFileSummary['stream_names'] )   


    inputFileSummary['TagStreamsRef']=None

    if 'TAG' in inputFileSummary['stream_names']:

        import PyUtils.PoolFile as pf
        tagStreamsRef=pf.extract_streams_from_tag(inFile,nentries=1)

        from RecExConfig.AutoConfiguration import GetDefaultTagRefStream
        streamTarget=GetDefaultTagRefStream(tagStreamsRef)
        msg.info ( "will redirect to target %s " % streamTarget )

        # now get the file on which the TAG is pointing
        from PyUtils.PoolFile import PoolFileCatalog as pfc
        newInFile=None
        catalog_name = [pfc.DefaultCatalog]
        # ensure consistency of (read) catalogs' content b/w
        # python PoolFileCatalog and C++ PoolSvc.ReadCatalog
        try:
            from AthenaCommon.AppMgr import ServiceMgr as svcMgr
            if (hasattr(svcMgr, 'PoolSvc') and
                hasattr(svcMgr.PoolSvc, 'ReadCatalog')):