def _guess_file_type(fname, msg): """guess the type of an input file (bs,rdo,esd,aod,...) """ input_type = None import PyUtils.AthFile as af try: file_type, file_name = af.ftype(fname) except Exception: raise # for now if file_type == 'bs': input_type = 'bs' elif file_type == 'pool': import PyUtils.PoolFile as pf stream_names = pf.extract_stream_names(fname) stream_names = [s.lower() for s in stream_names] if len(stream_names) > 1: msg.warning('got many stream names: %r', stream_names) msg.warning('only considering the 1st one...') elif len(stream_names) <= 0: msg.warning('got an empty list of stream names') raise SystemExit(1) stream_name = stream_names[0] input_type = { 'stream1': 'rdo', 'streamesd': 'esd', 'streamaod': 'aod', # FIXME: TODO: TAG, DPD }.get(stream_name, 'aod') else: msg.error('unknown file type (%s) for file [%s]', file_type, file_name) return input_type
def _guess_file_type(fname, msg): """guess the type of an input file (bs,rdo,esd,aod,...) """ input_type = None import PyUtils.AthFile as af try: file_type,file_name = af.ftype(fname) except Exception: raise # for now if file_type == 'bs': input_type = 'bs' elif file_type == 'pool': import PyUtils.PoolFile as pf stream_names = pf.extract_stream_names(fname) stream_names = [s.lower() for s in stream_names] if len(stream_names) > 1: msg.warning('got many stream names: %r', stream_names) msg.warning('only considering the 1st one...') elif len(stream_names) <= 0: msg.warning('got an empty list of stream names') raise SystemExit(1) stream_name = stream_names[0] input_type = { 'stream1': 'rdo', 'streamesd' : 'esd', 'streamaod' : 'aod', # FIXME: TODO: TAG, DPD }.get(stream_name, 'aod') else: msg.error('unknown file type (%s) for file [%s]', file_type, file_name) return input_type
def getInputFileToNumEventMapping(m_list): """ Returns a dictionary of the file names and the number of events in each file """ print( "|====================================================================================================================|" ) print( "| Doing the mapping from file to number of events |" ) print( "|====================================================================================================================|" ) m_numEventsPerFile = {} import PyUtils.PoolFile as PF failedFiles = [ "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91890_lb7.root", "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91891_lb13.root", "/castor/cern.ch/grid/atlas/caf/atlcal/perm/id/cosmics/ESDs/InDetESD_91891_lb24.root" ] for i in range(m_list.__len__()): if m_list[i] not in failedFiles: poolFile = PF.PoolFile(m_list[i]) m_numEventsPerFile[m_list[i]] = int( poolFile.fileInfos().split()[6]) print(m_numEventsPerFile) return m_numEventsPerFile
def main(args): """diff two POOL files (containers and sizes)""" import os.path as osp old = osp.expandvars(osp.expanduser(args.old)) new = osp.expandvars(osp.expanduser(args.new)) import PyUtils.PoolFile as PF diff = PF.DiffFiles(refFileName=old, chkFileName=new, verbose=args.verbose) diff.printSummary() return diff.status()
def diff_pool(self, file_name, ref_file): """TBD.""" import PyUtils.PoolFile as PF # diff-pool df = PF.DiffFiles(refFileName=ref_file, chkFileName=file_name, ignoreList=['RecoTimingObj_p1_RAWtoESD_timings', 'RecoTimingObj_p1_ESDtoAOD_timings']) df.printSummary() stat = df.status() print stat del df return stat
def _retrieve_items_from_input(): items = [] from AthenaCommon.AppMgr import ServiceMgr as svcMgr import PyUtils.PoolFile as _pf # only inspect the first input file # that should be enough as we don't really support varying schema # shapes anymore (as ROOT doesn't) items = _pf.extract_items(svcMgr.EventSelector.InputCollections[0]) input_items = [] for item in items: clid = _clid_from_string(item[0]) if clid is None: msg.warning('could not infer clid for: "%s"', item[0]) clid = item[0] # put back the original string, then. input_items.append([clid, item[1]]) return input_items
def diffPoolFiles(ref, chk, details, toIgnore=[ 'RecoTimingObj_p1_RAWtoESD_timings', 'RecoTimingObj_p1_ESDtoAOD_timings' ]): import PyUtils.PoolFile as PF try: df = PF.DiffFiles(refFileName=ref, chkFileName=chk, ignoreList=toIgnore) if details is None: df.printSummary() else: df.printSummary(details) stat = df.status() del df except: print("Exception caught while diff'ing POOL files") stat = True return stat
def main(args): """read a POOL file and dump its content. """ files = args.files if isinstance(files, basestring): files = [files] import sys import os import os.path as osp for i, f in enumerate(files): files[i] = osp.expandvars(osp.expanduser(f)) exitcode = 0 for fname in files: try: import PyUtils.PoolFile as PF PF.PoolOpts.FAST_MODE = args.fast pool_file = PF.PoolFile(fname) pool_file.checkFile(sorting=args.sort_fct) if args.detailed_dump: dump_file = osp.basename(fname) + '.txt' print "## dumping details into [%s]" % (dump_file, ) pool_file.detailedDump(dump_file) if args.output: oname = args.output print "## saving report into [%s]..." % (oname, ) pool_file.saveReport(oname) except Exception, e: print "## Caught exception [%s] !!" % str(e.__class__) print "## What:", e print sys.exc_info()[0] print sys.exc_info()[1] exitcode = 1 pass except:
action="store_true", dest="strict", default=False, help="Compare both memSize and diskSize") (options, args) = parser.parse_args() if len(args) > 0 and args[0][0] != "-": options.refFileName = args[0] pass if len(args) > 1 and args[1][0] != "-": options.fileName = args[1] pass if options.fileName is None or options.refFileName is None: str(parser.print_help() or "") sys.exit(1) pass chkFileName = os.path.expandvars(os.path.expanduser(options.fileName)) refFileName = os.path.expandvars(os.path.expanduser(options.refFileName)) import PyUtils.PoolFile as PF diff = PF.DiffFiles(refFileName=refFileName, chkFileName=chkFileName, verbose=options.verbose, strict=options.strict) diff.printSummary() sys.exit(diff.status())
pass fileNames = set(fileNames) # Check the consistency with the CSV output: if len(fileNames) > 1 and options.csvFileName: print( "WARNING CSV output is only available when processing a single " "input file") pass # Loop over the specified file(s): for fileName in fileNames: # Open the file: import PyUtils.PoolFile as PF poolFile = PF.PoolFile(fileName) # Loop over all the branches of the file, and sum up the information # about them in a smart way... summedData = {} categData = {} categTrigData = {} categTrigDynVars = {} for d in poolFile.data: # Skip metadata/TAG/etc. branches: # if d.dirType != "B": continue # The name of this branch: brName = d.name # Check if this is a static auxiliary store: m = re.match("(.*)Aux\..*", d.name) if m:
def __init__(self, TOTALCPUS, LOCALDIR, FILELIST, OutputLevel, doDetailedSplitting=False, nEventsPerFile=-1): def sort_by_value(d): """ Returns the keys of dictionary d sorted by their values """ items = d.items() backitems = [[v[1], v[0]] for v in items] backitems.sort() backitems.reverse() return [backitems[i][1] for i in range(0, len(backitems))] self.doDetailedSplitting = doDetailedSplitting self.OutputLevel = OutputLevel self.Files = FILELIST self.LocalDir = LOCALDIR self.nSubJobs = TOTALCPUS self.totalNumberOfEvents = 0 self.totalNumberOfFiles = 0 self.nEventsPerFile = nEventsPerFile inputFileList = [] if not self.doDetailedSplitting: inputfiles = open(FILELIST, "r") filelist = inputfiles.read().split('\n') for i in range(0, len(filelist)): filelist[i] = filelist[i].rstrip() inputfiles.close() if not LOCALDIR: print("Reading Custom File") FinalListSorted = [] for line in filelist: if line and line[0] != '#': FinalListSorted.append(line) #print (FinalListSorted) elif "castor" in LOCALDIR: print("Reading castor directory " + LOCALDIR + " Please wait...") extendedFileList = os.popen("rfdir " + LOCALDIR[7:]).read().splitlines() else: print("Reading directory " + LOCALDIR + " Please wait...") extendedFileList = os.popen("ls -l " + LOCALDIR).read().splitlines() if LOCALDIR: i = 0 SizeList = {} for line in extendedFileList: curr = line.split() if curr[0] != 'total': #print (curr[8], " ", curr[4]) SizeList[i] = {} SizeList[i][0] = curr[8].rstrip() SizeList[i][1] = curr[4].rstrip() i = i + 1 FinalList = {} count = 0 for i in range(0, len(SizeList)): #print (SizeList[i][0]) if SizeList[i][0] in filelist: #print ("Accepted") #print (SizeList[i][0], " size:", SizeList[i][1]) FinalList[SizeList[i][0]] = int(SizeList[i][1]) #SizeListSorted = [ (k,SizeList[k]) for k in sorted(SizeList.values())] FinalListSorted = sort_by_value(FinalList) #print ("Sorted list" ) #for i in range(0,len(FinalListSorted)): # print (FinalListSorted[i], "\tsize:\t", FinalList[FinalListSorted[i]]) currCPU = 0 self.CPUsFiles = {} nFiles = len(FinalListSorted) nRemainder = nFiles % TOTALCPUS nFilesPerCpu = nFiles / TOTALCPUS nFilesForThisCpu = 1 sumFileSize = 0 if len(FinalListSorted[0].split()) == 2: for i in range(0, nFiles): sumFileSize += int(FinalListSorted[i].split()[1]) print(sumFileSize) averageSizePerCpu = sumFileSize / TOTALCPUS * 0.97 print(averageSizePerCpu) sumSizeOnCpu = 0 #print ("NFile, Remainder, NfilesperCpu ", nFiles, " ", nRemainder, " ", nFilesPerCpu) #If the file size is present then use it to split the files if sumFileSize != 0: for i in range(0, nFiles): if currCPU in self.CPUsFiles: self.CPUsFiles[currCPU].append( LOCALDIR + FinalListSorted[i].split()[0]) else: self.CPUsFiles[currCPU] = [ LOCALDIR + FinalListSorted[i].split()[0] ] extraFiles = 0 sumSizeOnCpu += int(FinalListSorted[i].split()[1]) if (sumSizeOnCpu > averageSizePerCpu and i < nFiles - 1 and (sumSizeOnCpu + int(FinalListSorted[i + 1].split()[1])) > averageSizePerCpu * 1.04): print("File size on CPU: ", currCPU, '\t', sumSizeOnCpu) currCPU = currCPU + 1 if currCPU >= TOTALCPUS: currCPU = TOTALCPUS - 1 else: sumSizeOnCpu = 0 elif (nFiles - i == TOTALCPUS - currCPU): currCPU = currCPU + 1 print("File size on CPU: ", currCPU, '\t', sumSizeOnCpu) else: for i in range(0, nFiles): #print (FinalListSorted[i], "CPU: ", currCPU, " FPCPU: " , nFilesForThisCpu) if currCPU in self.CPUsFiles: self.CPUsFiles[currCPU].append( LOCALDIR + FinalListSorted[i].split()[0]) else: self.CPUsFiles[currCPU] = [ LOCALDIR + FinalListSorted[i].split()[0] ] extraFiles = 0 if (currCPU < nRemainder): extraFiles = 1 if (nFilesForThisCpu < nFilesPerCpu + extraFiles): nFilesForThisCpu = nFilesForThisCpu + 1 else: currCPU = currCPU + 1 nFilesForThisCpu = 1 # Doing the Detailed slitting else: import PyUtils.PoolFile as PF #Getting the number of events in each file. inputfiles = open(FILELIST, "r") numEventsPerFile = {} print("==================================================") print("The input file are: (May take some time..)") for line in inputfiles: if line.rstrip().find(".root") > 0: fullFileName = self.LocalDir + "/" + line.rstrip() inputFileList.append(fullFileName) poolFile = PF.PoolFile(fullFileName) thisNumEvents = int(poolFile.dataHeader.nEntries) self.totalNumberOfEvents += thisNumEvents self.totalNumberOfFiles += 1 print(fullFileName, " with ", thisNumEvents, " events") numEventsPerFile[fullFileName] = thisNumEvents print("==================================================") #Getting the Number of events to process, to skip, and the inputFile # The relavent quantities for each subJob self.m_skipEvents = {} self.nEvents = 0 self.m_inputFiles = {} # This means will do all the events if self.nEventsPerFile == -1: self.nEvents = int(self.totalNumberOfEvents / self.nSubJobs) else: self.nEvents = self.nEventsPerFile # local counters m_usedFiles = 0 m_numberEventsUsed = 0 for subJob in range(self.nSubJobs): self.m_inputFiles[subJob] = [] m_eventsNeeded = self.nEvents while (m_eventsNeeded != 0 and m_usedFiles < self.totalNumberOfFiles): # Two case the file indexed by m_usedFiles has enough event to complete # the events needed or it doesn't # If it does if m_eventsNeeded <= numEventsPerFile[ inputFileList[m_usedFiles]]: numEventsPerFile[ inputFileList[m_usedFiles]] -= m_eventsNeeded # Debugging #print ("subJob",subJob) #print ("m_eventsNeeded",m_eventsNeeded) #print ("self.m_inputFiles",self.m_inputFiles) #print ("m_usedFiles",m_usedFiles) self.m_inputFiles[subJob].append( inputFileList[m_usedFiles]) self.m_skipEvents[subJob] = m_numberEventsUsed m_numberEventsUsed += m_eventsNeeded m_eventsNeeded = 0 print("self.m_skipEvents[" + str(subJob) + "]", self.m_skipEvents[subJob]) print("m_numberEventsUsed", m_numberEventsUsed) # If it doesn't else: m_eventsNeeded -= numEventsPerFile[ inputFileList[m_usedFiles]] self.m_skipEvents[subJob] = m_numberEventsUsed self.m_inputFiles[subJob].append( inputFileList[m_usedFiles]) m_usedFiles += 1 m_numberEventsUsed = 0
for Path in paths: if "castor" in Path: print "Reading castor directory. Please wait..." inputfiles = os.popen("rfdir " + Path).read().splitlines() else: print "Reading directory. Please wait..." inputfiles = os.popen("ls -l " + Path).read().splitlines() for line in inputfiles: filename = line.split()[8] if "root" in filename or "ESD" in filename or "data" in filename: fullFilename = Path + filename try: if ReadNEvents: poolFile = PF.PoolFile(fullFilename) numEvents = int(poolFile.dataHeader.nEntries) print filename print numEvents if numEvents > EventCut: outputText.write(fullFilename + '\t' + str(numEvents) + '\n') else: print "File with few events, skipping..." else: if not PoolFileCatalog: outputText.write(fullFilename + '\n') if PoolFileCatalog: print " Creating as PoolFileToCatalogue Please wait..." print " pool_insertFileToCatalog ", fullFilename
def finalize(self): import sys import PyUtils.PoolFile as PF class ShutUp: def __init__(self): self.save = os.dup(sys.stdout.fileno()) self.quiet = open("/dev/null", "w") return def mute(self): os.dup2(self.quiet.fileno(), sys.stdout.fileno()) return def unMute(self): os.dup2(self.save, sys.stdout.fileno()) return def _unfold(pr): return { 'name': pr.name, 'memSize': pr.memSize, 'diskSize': pr.diskSize, 'memSizeNoZip': pr.memSizeNoZip, 'nEntries': pr.nEntries, 'dirType': pr.dirType, 'details': pr.details, } import tempfile, atexit, os tmpfileno, tmpfile = tempfile.mkstemp(suffix='.py_shelve.dat') os.close(tmpfileno) atexit.register(os.unlink, tmpfile) import commands sc, checkfile = commands.getstatusoutput('which checkFile.py') if sc != 0: self.msg.info('could not fetch checkFile.py !') self.msg.info('no POOL file post-processing') return checkfile = os.path.realpath(checkfile) def run_check_file(pool_file, dbfile, app=checkfile): from subprocess import call with open('/dev/null', 'w') as dev_null: res = call([app, "-f", pool_file, "-o", dbfile, "--fast"], stdout=dev_null, stderr=dev_null) return res _msg = ShutUp() self.msg.info("Finalizing [%s]", self.name) ## FIXME: move to 'with' stmt when py2.5 is there for proper RAII ## and leaner code... self.inputPoolFiles = [f for f in self.inputPoolFiles] if len(self.inputPoolFiles) > 0: self.msg.info("Content of input POOL files:") _msg.mute() _msg.unMute() for i, inFileName in enumerate(self.inputPoolFiles[:20]): try: _msg.mute() sc = run_check_file(pool_file=inFileName, dbfile=tmpfile) if sc != 0: raise RuntimeError \ ('error running check file (rc=%r)'%sc) inFile = PF.PoolFile(tmpfile) _msg.unMute() self.svc.meta['inputPoolFiles/%i' % i] = { 'infos': inFile._fileInfos, 'nbrEvts': inFile.dataHeader.nEntries, 'dataHeader': [_unfold(inFile.dataHeader)], 'data': [_unfold(p) for p in inFile.data] } self.msg.info(" - [%s] %8.3f kb (%i events)", inFile._fileInfos['name'], inFile._fileInfos['size'] / Units.kB, inFile.dataHeader.nEntries) del inFile except Exception, err: _msg.unMute() self.msg.info("Could not run checkFile on [%s] !!", inFileName) self.msg.info("Reason: %s", err) if 'inFile' in dir(): del inFile _msg.unMute()
if options.fileName == None and len(fileNames) == 0: str(parser.print_help() or "") sys.exit(1) if options.fileName != None: fileName = os.path.expandvars(os.path.expanduser(options.fileName)) fileNames.append(fileName) fileNames = set(fileNames) sc = 0 for fileName in fileNames: try: import PyUtils.PoolFile as PF PF.PoolOpts.FAST_MODE = options.fastMode PF.PoolOpts.SUPER_DETAILED_BRANCH_SZ = options.super_detailed_branch_sz poolFile = PF.PoolFile(fileName) poolFile.checkFile(sorting=options.sortFctName) if options.doDetailedDump: dumpFile = os.path.basename(fileName) + ".txt" print "## dumping details into [%s]" % dumpFile poolFile.detailedDump(dumpFile) if options.outFileName: outFileName = options.outFileName print "## saving checkFile report into [%s]..." % outFileName poolFile.saveReport(outFileName) except Exception, e: print "## Caught exception [%s] !!" % str(e.__class__) print "## What:", e print sys.exc_info()[0] print sys.exc_info()[1] sc = 1
def _setup(): global inputFileSummary import os from RecExConfig.RecFlags import rec import AthenaCommon.Logging as L from AthenaCommon.Resilience import treatException #define a logger msg = L.logging.getLogger('inputFilePeeker' ) msg.info("Executing inputFilePeeker.py") # special setup for online reconstruction so far from AthenaCommon.AthenaCommonFlags import athenaCommonFlags if athenaCommonFlags.isOnline(): # set minimal items of inputFileSummary inputFileSummary={'file_type':'bs', 'evt_type':['IS_DATA','IS_ATLAS','IS_PHYSICS'], 'TagStreamsRef':''} return #get input file name from RecExConfig.RecoFunctions import InputFileNames inFiles=InputFileNames() if len(inFiles) < 1: msg.error("No input files specified yet! Cannot do anything.") #create and fill inputFileSummary (DC: looping through input files if necessary) import PyUtils.AthFile as athFile failed_trials = 0 for inFile in inFiles: try: fi = athFile.fopen(inFile) inputFileSummary = fi.fileinfos except Exception as err: msg.warning("Unable to open file [%s]"%inFile) msg.warning('caught:\n%s',err) import traceback traceback.print_exc() continue ## Making sure that stream_names is always defined if 'stream_names' not in inputFileSummary: msg.warning("AthFile didn't find key 'stream_names'. Recovering it but that's unexpected.") inputFileSummary['stream_names']=[] #First try to catch the no entries case if inputFileSummary['stream_names'] == []: try: inputFileSummary['stream_names'] = [fi.infos['metadata_items'][0][1]] except Exception as err: msg.info("Unable to find stream names in file metadata.") #If stream_names still not found, check for bytestream case or give default value if inputFileSummary['stream_names']==None or inputFileSummary['stream_names']==[]: if inputFileSummary['file_type']=='bs': msg.info("stream_names not present in input bytestream file. Giving default name 'StreamRAW'") inputFileSummary['stream_names']=['StreamRAW'] else: inputFileSummary['stream_names']=['Unknw'] msg.warning("Unable to find stream_name from input file. This HAS an effect on auto-configuration!") #At this point streamsName is always setup #DR TAG do not have run number if len(inputFileSummary['run_number']) >0 or 'TAG' in inputFileSummary['stream_names'] : msg.info("Successfully filled inputFileSummary from file %s"%inFile) break else: msg.warning("Unable to fill inputFileSummary from file %s. File is probably empty. Will try again with next (if any)."%inFile) ## everything failed... failed_trials += 1 ## now, we failed too many times. ## trigger an athfile cache-flush to not waste too much memory ## with file summaries which are irrelevant. ## FIXME: should the trigger be jobo-settable ? if failed_trials > 10: msg.warning("Unable to fill inputFileSummary [%d] times. flushing athfile cache..." % failed_trials) athFile.flush_cache() pass #consistency check if len(inputFileSummary.items()) < 1: msg.error("Unable to build inputFileSummary from any of the specified input files. There is probably a problem.") return #Exception: if input is TAG, you need to follow the link to fill inputFileSummary msg.info("Extracted streams %s from input file " % inputFileSummary['stream_names'] ) inputFileSummary['TagStreamsRef']=None if 'TAG' in inputFileSummary['stream_names']: import PyUtils.PoolFile as pf tagStreamsRef=pf.extract_streams_from_tag(inFile,nentries=1) from RecExConfig.AutoConfiguration import GetDefaultTagRefStream streamTarget=GetDefaultTagRefStream(tagStreamsRef) msg.info ( "will redirect to target %s " % streamTarget ) # now get the file on which the TAG is pointing from PyUtils.PoolFile import PoolFileCatalog as pfc newInFile=None catalog_name = [pfc.DefaultCatalog] # ensure consistency of (read) catalogs' content b/w # python PoolFileCatalog and C++ PoolSvc.ReadCatalog try: from AthenaCommon.AppMgr import ServiceMgr as svcMgr if (hasattr(svcMgr, 'PoolSvc') and hasattr(svcMgr.PoolSvc, 'ReadCatalog')): # add PoolSvc.ReadCatalog, not overwrite, otherwise # default PoolFileCatalog would be removed catalog_name += list(svcMgr.PoolSvc.ReadCatalog[:]) pass except Exception as err: msg.info( 'problem getting ReadCatalog value from svcMgr.PoolSvc:\n%s', err) pass try: #get guid of file to be navigated to, then get corresponding physics file name aTagStreamsRef=tagStreamsRef[streamTarget][0] newInFile=pfc(catalog=catalog_name).pfn(aTagStreamsRef) msg.info ( "reading TAG redirected to file fid: %s pfn:%s " % (aTagStreamsRef,newInFile)) try: fi = athFile.fopen(newInFile) except Exception: msg.warning ( "AthFile.fopen failed ! Could not redirect input TAG to first target file %s. Probably not available. Now trying them all." % newInfile ) newInFile=None except Exception: msg.warning ( "could not redirect input TAG to first target file %s. Probably not in catalog. Now trying them all." % aTagStreamsRef ) newInFile=None if newInFile is None: #get ALL guid of all files to be navigated to, then get corresponding physics file name of first available one allTagStreamsRef=pf.extract_streams_from_tag(inFile)[streamTarget] for aTagStreamsRef in allTagStreamsRef: try: newInFile=pfc(catalog=catalog_name).pfn(aTagStreamsRef) fi = athFile.fopen(newInFile) msg.info ( "finally redirected input TAG to file fid: %s pfn:%s " % (aTagStreamsRef,newInFile)) break except Exception: newInFile=None if newInFile is None: raise RuntimeError ("unable to redirect tag to any file. Autoconfiguration fails") else: inputFileSummary = fi.fileinfos # store information in inputFileSummary inputFileSummary['TagStreamsRef']=inFile #event data from RecExConfig.RecoFunctions import ListOfTupleToDic if 'eventdata_items' not in inputFileSummary: inputFileSummary['eventdata_items']=[] pass fullListTuple = inputFileSummary['eventdata_items'] inputFileSummary['eventdata_itemsDic']=ListOfTupleToDic(fullListTuple) fullList = [] if fullListTuple: for iTuple in fullListTuple : item = '%s#%s' % iTuple fullList.append(item) pass inputFileSummary['eventdata_itemsList']=fullList #meta-data if 'metadata_items' not in inputFileSummary: inputFileSummary['metadata_items']=[] pass fullListTuple = inputFileSummary['metadata_items'] inputFileSummary['metadata_itemsDic']=ListOfTupleToDic(fullListTuple) fullList = [] if fullListTuple: for iTuple in fullListTuple : item = '%s#%s' % iTuple fullList.append(item) pass inputFileSummary['metadata_itemsList']=fullList #Catch common problems if inputFileSummary['conditions_tag']==None: inputFileSummary['conditions_tag']="" if inputFileSummary['evt_type']==[] and inputFileSummary['file_type']=='bs': inputFileSummary['evt_type']=('IS_DATA', 'Unknown', 'Unknown') msg.warning('Bytestream input: guessing that evt_type=IS_DATA, but this is not 100% certain. Using auto-configuration is not safe if this info is wrong.') if inputFileSummary['evt_type']==[] and inputFileSummary['nentries']==0: conditionsTag=inputFileSummary['conditions_tag'] if conditionsTag.find('SIM')>0: inputFileSummary['evt_type']=('IS_SIMULATION', 'IS_ATLAS', 'IS_PHYSICS') else: inputFileSummary['evt_type']=('IS_DATA', 'Unknown', 'Unknown') pass msg.warning("Input file has zero events and hence no EventInfo object. Guessed that evt_type=%s, but this is not certain. Using auto-configuration is not safe if this info is wrong."%(inputFileSummary['evt_type'][0])) pass #Final print out (DEBUG) msg.debug("inputFileSummary is:") msg.debug(str(inputFileSummary)) return
class PoolMonTool(object): """ Persistency monitoring tool: measures memory and disk sizes of input and output containers (in POOL files) """ def __init__(self, svc): super(PoolMonTool, self).__init__() self.svc = svc self.name = svc.name + ".Pool" from AthenaCommon.AppMgr import ServiceMgr as svcMgr inFiles = set() if hasattr(svcMgr,'EventSelector') and \ hasattr(svcMgr.EventSelector, 'InputCollections') : for inFile in svcMgr.EventSelector.InputCollections: if inFile.startswith("ROOTTREE:"): inFile = inFile[len("ROOTTREE:"):] inFiles.add(inFile) outFiles = set() from AthenaCommon import CfgMgr from AthenaCommon.Configurable import Configurable for c in Configurable.allConfigurables.values(): if not isinstance(c, CfgMgr.AthenaOutputStream): continue try: outFile = c.properties()["OutputFile"] except KeyError: continue if outFile.startswith("ROOTTREE:"): outFile = outFile[len("ROOTTREE:"):] outFiles.add(outFile) self.inputPoolFiles = [i for i in inFiles] self.outputPoolFiles = [o for o in outFiles] @property def msg(self): import AthenaCommon.Logging as L return L.logging.getLogger(self.name) def initialize(self): self.msg.info("Initializing [%s]", self.name) self.msg.info("InputPoolFiles: %r", self.inputPoolFiles) self.msg.info("OutputPoolFiles: %r", self.outputPoolFiles) return def finalize(self): import sys import PyUtils.PoolFile as PF class ShutUp: def __init__(self): self.save = os.dup(sys.stdout.fileno()) self.quiet = open("/dev/null", "w") return def mute(self): os.dup2(self.quiet.fileno(), sys.stdout.fileno()) return def unMute(self): os.dup2(self.save, sys.stdout.fileno()) return def _unfold(pr): return { 'name': pr.name, 'memSize': pr.memSize, 'diskSize': pr.diskSize, 'memSizeNoZip': pr.memSizeNoZip, 'nEntries': pr.nEntries, 'dirType': pr.dirType, 'details': pr.details, } import tempfile, atexit, os tmpfileno, tmpfile = tempfile.mkstemp(suffix='.py_shelve.dat') os.close(tmpfileno) atexit.register(os.unlink, tmpfile) import commands sc, checkfile = commands.getstatusoutput('which checkFile.py') if sc != 0: self.msg.info('could not fetch checkFile.py !') self.msg.info('no POOL file post-processing') return checkfile = os.path.realpath(checkfile) def run_check_file(pool_file, dbfile, app=checkfile): from subprocess import call with open('/dev/null', 'w') as dev_null: res = call([app, "-f", pool_file, "-o", dbfile, "--fast"], stdout=dev_null, stderr=dev_null) return res _msg = ShutUp() self.msg.info("Finalizing [%s]", self.name) ## FIXME: move to 'with' stmt when py2.5 is there for proper RAII ## and leaner code... self.inputPoolFiles = [f for f in self.inputPoolFiles] if len(self.inputPoolFiles) > 0: self.msg.info("Content of input POOL files:") _msg.mute() _msg.unMute() for i, inFileName in enumerate(self.inputPoolFiles[:20]): try: _msg.mute() sc = run_check_file(pool_file=inFileName, dbfile=tmpfile) if sc != 0: raise RuntimeError \ ('error running check file (rc=%r)'%sc) inFile = PF.PoolFile(tmpfile) _msg.unMute() self.svc.meta['inputPoolFiles/%i' % i] = { 'infos': inFile._fileInfos, 'nbrEvts': inFile.dataHeader.nEntries, 'dataHeader': [_unfold(inFile.dataHeader)], 'data': [_unfold(p) for p in inFile.data] } self.msg.info(" - [%s] %8.3f kb (%i events)", inFile._fileInfos['name'], inFile._fileInfos['size'] / Units.kB, inFile.dataHeader.nEntries) del inFile except Exception, err: _msg.unMute() self.msg.info("Could not run checkFile on [%s] !!", inFileName) self.msg.info("Reason: %s", err) if 'inFile' in dir(): del inFile _msg.unMute() if len(self.outputPoolFiles) > 0: self.msg.info("Content of output POOL files:") for i, outFileName in enumerate(self.outputPoolFiles): try: _msg.mute() sc = run_check_file(pool_file=outFileName, dbfile=tmpfile) if sc != 0: raise RuntimeError \ ('error running check file (rc=%r)'%sc) outFile = PF.PoolFile(tmpfile) _msg.unMute() self.svc.meta['outputPoolFiles/%i' % i] = { 'infos': outFile._fileInfos, 'nbrEvts': outFile.dataHeader.nEntries, 'dataHeader': [_unfold(outFile.dataHeader)], 'data': [_unfold(p) for p in outFile.data] } self.msg.info(" - [%s] %8.3f kb (%i events)", outFile._fileInfos['name'], outFile._fileInfos['size'] / Units.kB, outFile.dataHeader.nEntries) del outFile except Exception, err: _msg.unMute() self.msg.info("Could not run checkFile on [%s] !!", outFileName) self.msg.info("Reason: %s", err) if 'outFile' in dir(): del outFile _msg.unMute()
#consistency check if len(inputFileSummary.items()) < 1: msg.error("Unable to build inputFileSummary from any of the specified input files. There is probably a problem.") return #Exception: if input is TAG, you need to follow the link to fill inputFileSummary msg.info("Extracted streams %s from input file " % inputFileSummary['stream_names'] ) inputFileSummary['TagStreamsRef']=None if 'TAG' in inputFileSummary['stream_names']: import PyUtils.PoolFile as pf tagStreamsRef=pf.extract_streams_from_tag(inFile,nentries=1) from RecExConfig.AutoConfiguration import GetDefaultTagRefStream streamTarget=GetDefaultTagRefStream(tagStreamsRef) msg.info ( "will redirect to target %s " % streamTarget ) # now get the file on which the TAG is pointing from PyUtils.PoolFile import PoolFileCatalog as pfc newInFile=None catalog_name = [pfc.DefaultCatalog] # ensure consistency of (read) catalogs' content b/w # python PoolFileCatalog and C++ PoolSvc.ReadCatalog try: from AthenaCommon.AppMgr import ServiceMgr as svcMgr if (hasattr(svcMgr, 'PoolSvc') and hasattr(svcMgr.PoolSvc, 'ReadCatalog')):