def execute(self): # retrieve values entries = self.getEntries() step = int(entries / 100) if int(entries / 100) > 0 else 1 ### Loop over entries for entry in progressbar(range(self._entries), self._entries, step=step, logger=self._logger, prefix="Looping over entries "): # the number of events is max if self.nov < entry: break # retrieve all values from the branches self.getEntry(entry) for alg in self._algTools: if alg.status is StatusTool.DISABLE: continue if alg.execute().isFailure(): self._logger.error( 'The tool %s return status code different of SUCCESS', alg.name) if alg.wtd is StatusWatchDog.ENABLE: self._logger.debug('Watchdog is true in %s. Skipp events', alg.name) # reset the watchdog since this was used alg.wtd = StatusWatchDog.DISABLE break
def tensor_frobenius_argmin( data, code_book, block_size = 10000, logger = None ): """ See here: http://scipy.github.io/old-wiki/pages/EricsBroadcastingDoc This consume so much memory. Because that, we will divide in blocks Allocate memory """ nevents = data.shape[0] if nevents > block_size: from RingerCore import progressbar nblocks = nevents/block_size d = np.zeros((nevents,)) remainder = nevents % block_size index_split = np.split(np.array(range(nevents-remainder)), nblocks) index_remainder = range(nevents - remainder, nevents) # Add block index for index in progressbar(index_split, len(index_split),step = 1, logger = logger,\ prefix = "Looping over memorys block "): block_data = data[index,:] d[index] = np.argmin(np.sqrt(np.sum(np.power(block_data[:,np.newaxis]-code_book ,2),axis=-1)),axis=1) # Add remaider events block_data = data[index_remainder,:] d[index_remainder] = np.argmin(np.sqrt(np.sum(np.power(block_data[:,np.newaxis]-code_book ,2),axis=-1)),axis=1) return d else: return np.argmin(np.sqrt(np.sum(np.power(data[:,np.newaxis]-code_book ,2),axis=-1)),axis=1)
def cat_files_py(flist, ofile, op, logger=None, level=None): """ cat files using python. taken from: https://gist.github.com/dimo414/2993381 """ op = WriteMethod.retrieve(op) if not isinstance(flist, (list, tuple)): flist = [flist] from RingerCore.Logger import LoggingLevel if level is None: level = LoggingLevel.INFO with open(ofile, 'wb') as out: from RingerCore.util import progressbar for fname in progressbar(flist, len(flist), prefix="Merging: ", disp=True if logger is not None else False, step=10, logger=logger, level=level): with open(fname, 'rb') as f: if op is WriteMethod.Readlines: out.writelines(f.readlines()) elif op is WriteMethod.Read: out.write(f.read()) elif op is WriteMethod.ShUtil: import shutil shutil.copyfileobj(f, out)
def __call__(self, fList, ringerOperation, **kw): """ Read ntuple and return patterns and efficiencies. Arguments: - fList: The file path or file list path. It can be an argument list of two types: o List: each element is a string path to the file; o Comma separated string: each path is separated via a comma o Folders: Expand folders recursively adding also files within them to analysis - ringerOperation: Set Operation type. It can be both a string or the RingerOperation Optional arguments: - filterType [None]: whether to filter. Use FilterType enumeration - reference [Truth]: set reference for targets. Use Reference enumeration - treePath [Set using operation]: set tree name on file, this may be set to use different sources then the default. Default for: o Offline: Offline/Egamma/Ntuple/electron o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger - offEtCut [None]: Set Offline cluster energy cut value - nClusters [None]: Read up to nClusters. Use None to run for all clusters. - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters. - etBins [None]: E_T bins (GeV) where the data should be segmented - etaBins [None]: eta bins where the data should be segmented - ringConfig [100]: A list containing the number of rings available in the data for each eta bin. - crossVal [None]: Whether to measure benchmark efficiency splitting it by the crossVal-validation datasets - extractDet [None]: Which detector to export (use Detector enumeration). Defaults are: o L2Calo: Calorimetry o L2: Tracking o Offline: Calorimetry o Others: CaloAndTrack - standardCaloVariables [False]: Whether to extract standard track variables. - useTRT [False]: Whether to export TRT information when dumping track variables. - supportTriggers [True]: Whether reading data comes from support triggers """ __eventBranches = [ 'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber', 'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition', 'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y', 'isTagTag' ] __trackBranches = [ 'elCand2_deltaeta1', 'elCand2_DeltaPOverP', 'elCand2_deltaphiRescaled', 'elCand2_d0significance', 'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT' ] __monteCarloBranches = [ 'type', 'origin', 'originbkg', 'typebkg', 'isTruthElectronFromZ', 'TruthParticlePdgId', 'firstEgMotherPdgId', 'TruthParticleBarcode', 'firstEgMotherBarcode', 'MotherPdgId', 'MotherBarcode', 'FirstEgMotherTyp', 'FirstEgMotherOrigin', 'dRPdgId', ] __onlineBranches = ['match', 'ringerMatch', 'ringer_rings'] __offlineBranches = ['et', 'eta'] # The current pid map used as offline reference pidConfigs = { key: value for key, value in RingerOperation.efficiencyBranches().iteritems() if key in (RingerOperation.Offline_LH_Tight, RingerOperation.Offline_LH_Medium, RingerOperation.Offline_LH_Loose, RingerOperation.Offline_LH_VeryLoose) } # Retrieve information from keyword arguments filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter) reference = retrieve_kw(kw, 'reference', Reference.AcceptAll) offEtCut = retrieve_kw(kw, 'offEtCut', None) l2EtCut = retrieve_kw(kw, 'l2EtCut', None) treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate') nClusters = retrieve_kw(kw, 'nClusters', None) etBins = retrieve_kw(kw, 'etBins', None) etaBins = retrieve_kw(kw, 'etaBins', None) crossVal = retrieve_kw(kw, 'crossVal', None) ringConfig = retrieve_kw(kw, 'ringConfig', 100) monitoring = retrieve_kw(kw, 'monitoring', None) pileupRef = retrieve_kw(kw, 'pileupRef', NotSet) getRates = retrieve_kw(kw, 'getRates', True) getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False) getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False) extractDet = retrieve_kw(kw, 'extractDet', None) import ROOT #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C"); #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C') if ROOT.gSystem.Load('libTuningTools') < 0: self._fatal("Could not load TuningTools library", ImportError) if 'level' in kw: self.level = kw.pop('level') # and delete it to avoid mistakes: checkForUnusedVars(kw, self._warning) del kw ### Parse arguments # Also parse operation, check if its type is string and if we can # transform it to the known operation enum: fList = csvStr2List(fList) fList = expandFolders(fList) ringerOperation = RingerOperation.retrieve(ringerOperation) reference = Reference.retrieve(reference) # Offline E_T cut if offEtCut: offEtCut = 1000. * offEtCut # Put energy in MeV # Check whether using bins useBins = False useEtBins = False useEtaBins = False nEtaBins = 1 nEtBins = 1 if etaBins is None: etaBins = npCurrent.fp_array([]) if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins) if etBins is None: etBins = npCurrent.fp_array([]) if type(etBins) is list: etBins = npCurrent.fp_array(etBins) if etBins.size: etBins = etBins * 1000. # Put energy in MeV nEtBins = len(etBins) - 1 if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(( 'Number of et bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.' ), nEtBins, np.iinfo(npCurrent.scounter_dtype).max) # Flag that we are separating data through bins useBins = True useEtBins = True self._debug('E_T bins enabled.') if not type(ringConfig) is list and not type(ringConfig) is np.ndarray: ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1 if type(ringConfig) is list: ringConfig = npCurrent.int_array(ringConfig) if not len(ringConfig): self._fatal('Rings size must be specified.') if etaBins.size: nEtaBins = len(etaBins) - 1 if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(( 'Number of eta bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.' ), nEtaBins, np.iinfo(npCurrent.scounter_dtype).max) if len(ringConfig) != nEtaBins: self._fatal(( 'The number of rings configurations (%r) must be equal than ' 'eta bins (%r) region config'), ringConfig, etaBins) useBins = True useEtaBins = True self._debug('eta bins enabled.') else: self._debug('eta/et bins disabled.') # The base information holder, such as et, eta and pile-up if pileupRef is NotSet: if ringerOperation > 0: pileupRef = PileupReference.avgmu else: pileupRef = PileupReference.nvtx pileupRef = PileupReference.retrieve(pileupRef) self._info("Using '%s' as pile-up reference.", PileupReference.tostring(pileupRef)) # Candidates: (1) is tags and (2) is probes. Default is probes self._candIdx = 1 if getTagsOnly else 2 # Mutual exclusive arguments: if not getRates and getRatesOnly: self._logger.error( "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True." ) getRates = True ### Prepare to loop: t = ROOT.TChain(treePath) for inputFile in progressbar(fList, len(fList), logger=self._logger, prefix="Creating collection tree "): # Check if file exists f = ROOT.TFile.Open(inputFile, 'read') if not f or f.IsZombie(): self._warning('Couldn' 't open file: %s', inputFile) continue # Inform user whether TTree exists, and which options are available: self._debug("Adding file: %s", inputFile) obj = f.Get(treePath) if not obj: self._warning("Couldn't retrieve TTree (%s)!", treePath) self._info("File available info:") f.ReadAll() f.ReadKeys() f.ls() continue elif not isinstance(obj, ROOT.TTree): self._fatal("%s is not an instance of TTree!", treePath, ValueError) t.Add(inputFile) # Turn all branches off. t.SetBranchStatus("*", False) # RingerPhysVal hold the address of required branches event = ROOT.SkimmedNtuple() # Ready to retrieve the total number of events t.GetEntry(0) ## Allocating memory for the number of entries entries = t.GetEntries() nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \ else nClusters ## Retrieve the dependent operation variables: if useEtBins: etBranch = ('elCand%d_et') % ( self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % ( self._candIdx) self.__setBranchAddress(t, etBranch, event) self._debug("Added branch: %s", etBranch) npEt = npCurrent.scounter_zeros( shape=npCurrent.shape(npat=1, nobs=nobs)) self._debug("Allocated npEt with size %r", npEt.shape) if useEtaBins: etaBranch = ('elCand%d_eta') % ( self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % ( self._candIdx) self.__setBranchAddress(t, etaBranch, event) self._debug("Added branch: %s", etaBranch) npEta = npCurrent.scounter_zeros( shape=npCurrent.shape(npat=1, nobs=nobs)) self._debug("Allocated npEta with size %r", npEta.shape) if reference is Reference.Truth: self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') % (self._candIdx), event) for var in __offlineBranches: self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var), event) #for var in pidConfigs.values(): # self.__setBranchAddress(t,var,event) for var in __trackBranches: self.__setBranchAddress(t, var, event) # Add online branches if using Trigger if ringerOperation > 0: for var in __onlineBranches: self.__setBranchAddress(t, ('fcCand%d_%s') % (self._candIdx, var), event) else: self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, 'ringer_rings'), event) if pileupRef is PileupReference.nvtx: pileupBranch = 'Nvtx' pileupDataType = np.uint16 elif pileupRef is PileupReference.avgmu: pileupBranch = 'averageIntPerXing' pileupDataType = np.float32 else: raise NotImplementedError( "Pile-up reference %r is not implemented." % pileupRef) #for var in __eventBranches + for var in [pileupBranch]: self.__setBranchAddress(t, var, event) ### Allocate memory if extractDet == (Detector.Calorimetry): npat = ringConfig.max() elif extractDet == (Detector.Tracking): npat = len(__trackBranches) # NOTE: Check if pat is correct for both Calo and track data elif extractDet in (Detector.CaloAndTrack, Detector.All): npat = ringConfig.max() + len(__trackBranches) npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape( npat=npat, #getattr(event, ringerBranch).size() nobs=nobs)) self._debug("Allocated npPatterns with size %r", npPatterns.shape) baseInfoBranch = BaseInfo( (etBranch, etaBranch, pileupBranch), (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType)) baseInfo = [ None, ] * baseInfoBranch.nInfo # Add E_T, eta and luminosity information npBaseInfo = [ npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs), dtype=baseInfoBranch.dtype(idx)) for idx in baseInfoBranch ] from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector branchEffCollectors = OrderedDict() branchCrossEffCollectors = OrderedDict() if ringerOperation < 0: from operator import itemgetter benchmarkDict = OrderedDict( sorted([(key, value) for key, value in RingerOperation.efficiencyBranches().iteritems() if key < 0 and not (isinstance(value, (list, tuple)))], key=itemgetter(0))) else: benchmarkDict = OrderedDict() for key, val in benchmarkDict.iteritems(): branchEffCollectors[key] = list() branchCrossEffCollectors[key] = list() # Add efficincy branch: if ringerOperation < 0: self.__setBranchAddress(t, val, event) for etBin in range(nEtBins): if useBins: branchEffCollectors[key].append(list()) branchCrossEffCollectors[key].append(list()) for etaBin in range(nEtaBins): etBinArg = etBin if useBins else -1 etaBinArg = etaBin if useBins else -1 argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ] branchEffCollectors[key][etBin].append( BranchEffCollector(*argList)) if crossVal: branchCrossEffCollectors[key][etBin].append( BranchCrossEffCollector(entries, crossVal, *argList)) # etBin # etaBin # benchmark dict if self._logger.isEnabledFor(LoggingLevel.DEBUG): self._debug( 'Retrieved following branch efficiency collectors: %r', [ collector[0].printName for collector in traverse(branchEffCollectors.values()) ]) etaBin = 0 etBin = 0 step = int(entries / 100) if int(entries / 100) > 0 else 1 ## Start loop! self._info("There is available a total of %d entries.", entries) cPos = 0 ### Loop over entries for entry in progressbar(range(entries), entries, step=step, logger=self._logger, prefix="Looping over entries "): self._verbose('Processing eventNumber: %d/%d', entry, entries) t.GetEntry(entry) #print self.__getEt(event) if event.elCand2_et < offEtCut: self._debug( "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV", event.elCand2_et, offEtCut) continue # Add et distribution for all events if ringerOperation > 0: if event.fcCand2_et < l2EtCut: self._debug("Ignoring entry due Fast Calo E_T cut.") continue # Add et distribution for all events # Set discriminator target: target = Target.Unknown # Monte Carlo cuts if reference is Reference.Truth: if getattr(event, ('elCand%d_isTruthElectronFromZ') % (self._candIdx)): target = Target.Signal elif not getattr(event, ('elCand%d_isTruthElectronFromZ') % (self._candIdx)): target = Target.Background # Offline Likelihood cuts elif reference is Reference.Off_Likelihood: if getattr(event, pidConfigs[RingerOperation.Offline_LH_Tight]): target = Target.Signal elif not getattr( event, pidConfigs[RingerOperation.Offline_LH_VeryLoose]): target = Target.Background # By pass everything (Default) elif reference is Reference.AcceptAll: target = Target.Signal if filterType is FilterType.Signal else Target.Background # Run filter if it is defined if filterType and \ ( (filterType is FilterType.Signal and target != Target.Signal) or \ (filterType is FilterType.Background and target != Target.Background) or \ (target == Target.Unknown) ): #self._verbose("Ignoring entry due to filter cut.") continue ## Retrieve base information and rings: for idx in baseInfoBranch: lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx)) baseInfo[idx] = lInfo # Retrieve dependent operation region if useEtBins: etBin = self.__retrieveBinIdx(etBins, baseInfo[0]) if useEtaBins: etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1])) # Check if bin is within range (when not using bins, this will always be true): if (etBin < nEtBins and etaBin < nEtaBins): if useEtBins: npEt[cPos] = etBin if useEtaBins: npEta[cPos] = etaBin # Online operation cPat = 0 caloAvailable = True if ringerOperation > 0 and self.__get_ringer_onMatch( event) < 1: continue # TODO Treat case where we don't use rings energy # Check if the rings empty if self.__get_rings_energy(event, ringerOperation).empty(): self._debug( 'No rings available in this event. Skipping...') caloAvailable = False # Retrieve rings: if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if caloAvailable: try: pass patterns = stdvector_to_list( self.__get_rings_energy( event, ringerOperation)) lPat = len(patterns) if lPat == ringConfig[etaBin]: npPatterns[npCurrent.access( pidx=slice(cPat, ringConfig[etaBin]), oidx=cPos)] = patterns else: oldEtaBin = etaBin if etaBin > 0 and ringConfig[etaBin - 1] == lPat: etaBin -= 1 elif etaBin + 1 < len( ringConfig) and ringConfig[etaBin + 1] == lPat: etaBin += 1 npPatterns[npCurrent.access( pidx=slice(cPat, ringConfig[etaBin]), oidx=cPos)] = patterns self._warning(( "Recovered event which should be within eta bin (%d: %r) " "but was found to be within eta bin (%d: %r). " "Its read eta value was of %f."), oldEtaBin, etaBins[oldEtaBin:oldEtaBin + 2], etaBin, etaBins[etaBin:etaBin + 2], np.fabs(getattr( event, etaBranch))) except ValueError: self._logger.error(( "Patterns size (%d) do not match expected " "value (%d). This event eta value is: %f, and ringConfig is %r." ), lPat, ringConfig[etaBin], np.fabs( getattr(event, etaBranch)), ringConfig) continue cPat += ringConfig[etaBin] else: # Also display warning when extracting only calorimetry! self._warning("Rings not available") continue if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): for var in __trackBranches: npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] = getattr( event, var) if var == 'elCand2_eProbabilityHT': from math import log TRT_PID = npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] epsilon = 1e-99 if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15 elif TRT_PID <= 0.0: TRT_PID = epsilon tau = 15.0 TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0) npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] = TRT_PID cPat += 1 ## Retrieve rates information: if getRates and ringerOperation < 0: #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 ) event.elCand2_isEMLoose2015 = not ( event.elCand2_isEMLoose2015 & 34896) event.elCand2_isEMMedium2015 = not ( event.elCand2_isEMMedium2015 & 276858960) event.elCand2_isEMTight2015 = not ( event.elCand2_isEMTight2015 & 281053264) for branch in branchEffCollectors.itervalues(): if not useBins: branch.update(event) else: branch[etBin][etaBin].update(event) if crossVal: for branchCross in branchCrossEffCollectors.itervalues( ): if not useBins: branchCross.update(event) else: branchCross[etBin][etaBin].update(event) # end of (getRates) if not monitoring is None: self.__fillHistograms(monitoring, filterType, pileupRef, pidConfigs, event) # We only increment if this cluster will be computed cPos += 1 # end of (et/eta bins) # Limit the number of entries to nClusters if desired and possible: if not nClusters is None and cPos >= nClusters: break # for end ## Treat the rings information ## Remove not filled reserved memory space: if npPatterns.shape[npCurrent.odim] > cPos: npPatterns = np.delete(npPatterns, slice(cPos, None), axis=npCurrent.odim) ## Segment data over bins regions: # Also remove not filled reserved memory space: if useEtBins: npEt = npCurrent.delete(npEt, slice(cPos, None)) if useEtaBins: npEta = npCurrent.delete(npEta, slice(cPos, None)) # Treat standardCaloVariables = False npObject = self.treatNpInfo( cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npPatterns, ) data = [ self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npData) for npData in npBaseInfo ] npBaseInfo = npCurrent.array(data, dtype=np.object) if getRates: if crossVal: for etBin in range(nEtBins): for etaBin in range(nEtaBins): for branchCross in branchCrossEffCollectors.itervalues( ): if not useBins: branchCross.finished() else: branchCross[etBin][etaBin].finished() # Print efficiency for each one for the efficiency branches analysed: for etBin in range(nEtBins) if useBins else range(1): for etaBin in range(nEtaBins) if useBins else range(1): for branch in branchEffCollectors.itervalues(): lBranch = branch if not useBins else branch[etBin][ etaBin] self._info('%s', lBranch) if crossVal: for branchCross in branchCrossEffCollectors.itervalues( ): lBranchCross = branchCross if not useBins else branchCross[ etBin][etaBin] lBranchCross.dump(self._debug, printSort=True, sortFcn=self._verbose) # for branch # for eta # for et else: branchEffCollectors = None branchCrossEffCollectors = None # end of (getRates) outputs = [] outputs.extend((npObject, npBaseInfo)) if getRates: outputs.extend((branchEffCollectors, branchCrossEffCollectors)) return outputs
## Retrieve parser args: args = parser.parse_args(namespace=LoggerNamespace()) mainLogger.setLevel(args.output_level) if mainLogger.isEnabledFor(LoggingLevel.DEBUG): from pprint import pprint pprint(args.inputFiles) ## Treat special arguments if len(args.inputFiles) == 1: args.inputFiles = csvStr2List(args.inputFiles[0]) args.inputFiles = expandFolders(args.inputFiles) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor(LoggingLevel.VERBOSE): pprint(args.inputFiles) for inFile in progressbar(args.inputFiles, len(args.inputFiles), logger=mainLogger, prefix="Processing files "): # Treat output file name: from RingerCore import checkExtension, changeExtension, load, save if checkExtension(inFile, "tgz|tar.gz|pic"): cOutputName = changeExtension(inFile, '.mat') if args.change_output_folder: import os.path cOutputName = os.path.join( os.path.abspath(args.change_output_folder), os.path.basename(cOutputName)) data = load(inFile, useHighLevelObj=False) from scipy.io import savemat try: savemat(cOutputName, data) except ImportError:
cOutputName = args.outputFile cOutputName = ensureExtension( cOutputName, 'tgz|tar.gz' ) if m: file_format = m.group(1) wantedFormat = re.compile(r'.*\.' + file_format + r'(\.[0-9]*)?$') isSame = [ bool(wantedFormat.match(filename)) for filename in fileCollection ] from RingerCore.util import cat_files_py if all(isSame): if file_format in ("tgz", "tar.gz"): cat_files_py( fileCollection, cOutputName, args.writeMethod, mainLogger ) elif file_format == "pic": if BooleanStr.retrieve(args.allowTmpFiles): import tempfile tmpFolderPath=tempfile.mkdtemp() for inputFile in progressbar(fileCollection, len(fileCollection), prefix="Compressing: ", disp = True if mainLogger is not None else False, step = 10, logger = mainLogger, level = LoggingLevel.INFO ): import subprocess import os.path lFile = os.path.split(inputFile)[-1] subprocess.Popen(['tar', '-czf', tmpFolderPath + '/' + ensureExtension( lFile, '.tgz|.tar.gz'), os.path.relpath(inputFile)]) cat_files_py( expandFolders( tmpFolderPath ), cOutputName, args.writeMethod, mainLogger ) import shutil shutil.rmtree(tmpFolderPath) else: import tarfile with tarfile.open(cOutputName, "w:gz") as tar: for inputFile in progressbar(fileCollection, len(fileCollection), prefix="Merging: ", disp = True if mainLogger is not None else False, step = 10,
if mainLogger.isEnabledFor(LoggingLevel.DEBUG): from pprint import pprint pprint(args.inputFiles) import ROOT, numpy as np ## Treat special arguments if len(args.inputFiles) == 1: args.inputFiles = csvStr2List(args.inputFiles[0]) args.inputFiles = expandFolders(args.inputFiles) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor(LoggingLevel.VERBOSE): pprint(args.inputFiles) for idx, inFile in progressbar(enumerate(args.inputFiles), len(args.inputFiles), logger=mainLogger, prefix="Processing files "): # Treat output file name: from RingerCore import checkExtension, changeExtension, save, ensureExtension if checkExtension(inFile, "root"): cOutputName = ensureExtension( args.outputFiles[idx] if args.outputFiles and idx < len(args.outputFiles) else changeExtension( inFile, '.npz'), '.npz') if args.change_output_folder: import os.path cOutputName = os.path.join( os.path.abspath(args.change_output_folder), os.path.basename(cOutputName)) f = ROOT.TFile(inFile, 'r') mainLogger.debug("Reading key: %s", args.treePath)
## Retrieve parser args: args = parser.parse_args( namespace = LoggerNamespace() ) mainLogger.setLevel( args.output_level ) if mainLogger.isEnabledFor( LoggingLevel.DEBUG ): from pprint import pprint pprint(args.inputFiles) ## Treat special arguments if len( args.inputFiles ) == 1: args.inputFiles = csvStr2List( args.inputFiles[0] ) args.inputFiles = expandFolders( args.inputFiles ) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor( LoggingLevel.VERBOSE ): pprint(args.inputFiles) for inFile in progressbar(args.inputFiles, len(args.inputFiles), logger = mainLogger, prefix = "Processing files "): # Treat output file name: from RingerCore import checkExtension, changeExtension, load, save if checkExtension( inFile, "tgz|tar.gz|pic" ): cOutputName = changeExtension( inFile, '.mat' ) if args.change_output_folder: import os.path cOutputName = os.path.join( os.path.abspath(args.change_output_folder) , os.path.basename(cOutputName) ) data = load( inFile, useHighLevelObj = False ) from scipy.io import savemat try: savemat( cOutputName, data ) except ImportError: self._logger.fatal(("Cannot save matlab file, it seems that scipy is not " "available."), ImportError) mainLogger.info("Successfully created matlab file: %s", cOutputName)
wantedFormat = re.compile(r'.*\.' + file_format + r'(\.[0-9]*)?$') isSame = [ bool(wantedFormat.match(filename)) for filename in fileCollection ] if all(isSame): if file_format in ("tgz", "tar.gz"): cat_files_py(fileCollection, cOutputName, args.writeMethod, mainLogger) elif file_format == "pic": if BooleanStr.retrieve(args.allowTmpFiles): import tempfile tmpFolderPath = tempfile.mkdtemp() for inputFile in progressbar( fileCollection, len(fileCollection), prefix="Compressing: ", disp=True if mainLogger is not None else False, step=10, logger=mainLogger, level=LoggingLevel.INFO): import subprocess import os.path lFile = os.path.split(inputFile)[-1] subprocess.Popen([ 'tar', '-czf', tmpFolderPath + '/' + ensureExtension(lFile, '.tgz|.tar.gz'), os.path.relpath(inputFile) ]) cat_files_py(expandFolders(tmpFolderPath), cOutputName, args.writeMethod, mainLogger) import shutil shutil.rmtree(tmpFolderPath)
from RingerCore import load,save from RingerCore import changeExtension, ensureExtension, appendToFileName, progressbar, mkdir_p from itertools import product import numpy as np if args.outputPath is None: args.outputPath = os.path.dirname(args.inputFile) if not os.path.isdir( args.outputPath ): mkdir_p( args.outputPath ) f = load(args.inputFile) # Copy all metada information baseDict = { k : f[k] for k in f.keys() if not '_etBin_' in k and not '_etaBin_' in k } nEtBins = f['nEtBins'].item() nEtaBins = f['nEtaBins'].item() for etIdx, etaIdx in progressbar( product(xrange(nEtBins), xrange(nEtaBins)) , nEtBins*nEtaBins , logger = mainLogger , prefix = 'Juicing file '): binDict= {k:f[k] for k in f.keys() if 'etBin_%d_etaBin_%d'%(etIdx,etaIdx) in k} binDict.update(baseDict) from copy import deepcopy for layer in caloLayers: pp=PreProcChain([RingerLayerSegmentation(layer=layer)]) tmpBinDict = deepcopy(binDict) for key in binDict.keys(): if 'Patterns' in key: tmpBinDict[key] = pp(binDict[key]) outFile = os.path.join( args.outputPath, os.path.basename( appendToFileName(args.inputFile.replace('calo','calo'+RingerLayer.tostring(layer)), 'et%d_eta%d' % (etIdx, etaIdx) ) ) ) save(tmpBinDict, outFile, protocol = 'savez_compressed' )
def plot(self, **kw): from ROOT import kRed dirname = retrieve_kw(kw, 'dirname', 'Distribution') basecolor = retrieve_kw(kw, 'basecolor', kRed - 7) pdftitle = retrieve_kw(kw, 'pdftitle', 'Distributions') pdfoutput = retrieve_kw(kw, 'pdfoutput', 'distributions') import os # Organize outputs (.py and .pdf) prefix = self._basepath.split('/')[-1] localpath = os.getcwd() + '/' + dirname + '/' + prefix try: if not os.path.exists(localpath): os.makedirs(localpath) except: self._logger.warning('The director %s exist.', localpath) hist_names = [ 'et', 'eta', 'mu', 'nvtx', 'reta', 'eratio', 'weta2', 'rhad', 'rphi', 'f1', 'f3' ] hist_labels = [ 'E_{T}', "#eta", "<#mu>", 'N_{vtx}', 'R_{eta}', 'E_{ratio}', 'W_{eta2}', 'R_{had}', 'R_{phi}', 'f_{1}', 'f_{3}' ] from ROOT import TCanvas, TH1F, gStyle, TLegend, TPad from ROOT import kGreen, kRed, kBlue, kBlack, kGray, gPad, kAzure from TrigEgammaDevelopments.plots.AtlasStyle import AtlasStyle, atlas_template, setLegend1 canvas1 = TCanvas('canvas1', 'canvas1', 2500, 1600) canvas1.Divide(4, 3) # Concatenate distributions for all regions def sumAllRegions(histname): h = None for etBinIdx in range(len(self._etBins) - 1): for etaBinIdx in range(len(self._etaBins) - 1): binningname = ('et%d_eta%d') % (etBinIdx, etaBinIdx) path = self._basepath + '/' + self.currentDir( ) + '/' + binningname if h: h += self.storeSvc().histogram(path + '/' + histname) else: h = self.storeSvc().histogram(path + '/' + histname).Clone() return h collector = [] figures = { 'rings': [], 'rnnOutput': [], 'ringer_profile': str(), 'shower_shapes': str() } """ Plot all shower shapes distributins """ for idx, histname in enumerate(hist_names): self.setDir('Data') h_data = sumAllRegions(histname) self.setDir('MonteCarlo') h_mc = sumAllRegions(histname) #h_mc, h_data = self.__scale_histograms(h_mc, h_data, 100, 0.01, 0.01) pad = canvas1.cd(idx + 1) gStyle.SetOptStat(110011) collector.append(pad) h_mc.SetFillColor(basecolor) h_mc.SetLineColor(basecolor) h_data.SetLineColor(kBlack) h_mc.Scale(1. / h_mc.GetMaximum()) h_data.Scale(1. / h_data.GetMaximum()) h_mc.Draw() h_data.Draw('same') leg1 = TLegend(0.2, 0.75, 0.5, 0.95) setLegend1(leg1) leg1.AddEntry(h_mc, 'MC') leg1.AddEntry(h_data, 'Data') leg1.Draw() collector[-1].Update() collector.append(h_mc) collector.append(h_data) collector.append(leg1) canvas1.SaveAs(localpath + '/shower_shapes_distributions.pdf') figures[ 'shower_shapes'] = localpath + '/shower_shapes_distributions.pdf' """ Plot all shower ringer shapes for each ring """ ratio_size_as_fraction = 0.35 from RingerCore import progressbar rings_localpath = [] for r in progressbar(range(100), 100, step=1, logger=self._logger, prefix="Looping over rings (Plotting...) "): canvas2 = TCanvas('canvas2', 'canvas2', 2500, 1600) drawopt = 'pE1' canvas2.cd() top = TPad("pad_top", "This is the top pad", 0.0, ratio_size_as_fraction, 1.0, 1.0) top.SetBottomMargin(0.0) top.SetBottomMargin(0.06 / float(top.GetHNDC())) #top.SetTopMargin (0.04/float(top.GetHNDC())) top.SetRightMargin(0.05) top.SetLeftMargin(0.16) top.SetFillColor(0) top.Draw(drawopt) canvas2.cd() bot = TPad("pad_bot", "This is the bottom pad", 0.0, 0.0, 1.0, ratio_size_as_fraction) bot.SetBottomMargin(0.10 / float(bot.GetHNDC())) #bot.SetTopMargin (0.02/float(bot.GetHNDC())) bot.SetTopMargin(0.0) bot.SetRightMargin(0.05) bot.SetLeftMargin(0.16) bot.SetFillColor(0) bot.Draw(drawopt) self.setDir('MonteCarlo') h_mc = sumAllRegions('rings/ring_' + str(r)) self.setDir('Data') h_data = sumAllRegions('rings/ring_' + str(r)) gStyle.SetOptStat(000000) h_mc, h_data = self.__scale_histograms(h_mc, h_data, 100, 0.0001, 0.025) h_mc.Scale(1. / h_mc.GetMaximum()) h_data.Scale(1. / h_data.GetMaximum()) from ROOT import TH1, kGray divide = "" drawopt = 'pE1' bot.cd() ref = h_mc.Clone() h = h_data.Clone() ref.Sumw2() h.Sumw2() ratioplot = h.Clone() ratioplot.Sumw2() ratioplot.SetName(h.GetName() + '_ratio') ratioplot.Divide(h, ref, 1., 1., '') ratioplot.SetFillColor(0) ratioplot.SetFillStyle(0) ratioplot.SetMarkerColor(1) ratioplot.SetLineColor(kGray) ratioplot.SetMarkerStyle(24) ratioplot.SetMarkerSize(1.2) ratioplot.GetYaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetRangeUser(-1.6, 3.7) ratioplot.GetYaxis().SetTitleOffset(0.7) ratioplot.GetYaxis().SetTitle('Data/MC') ratioplot.GetXaxis().SetTitle('Ring #' + str(r + 1) + ' [MeV]') ratioplot.Draw(drawopt) from ROOT import TLine nbins = h_data.GetNbinsX() xmin = h_data.GetXaxis().GetBinLowEdge(1) xmax = h_data.GetXaxis().GetBinLowEdge(nbins + 1) l1 = TLine(xmin, 1, xmax, 1) l1.SetLineColor(kRed) l1.SetLineStyle(2) l1.Draw() bot.Update() top.cd() h_mc.SetFillColor(basecolor) h_mc.SetLineWidth(1) h_mc.SetLineColor(basecolor) h_data.SetLineColor(kBlack) h_data.SetLineWidth(1) h_mc.GetYaxis().SetTitle('Count') h_mc.Draw() h_data.Draw('same') leg1 = TLegend(0.8, 0.70, 0.95, 0.95) setLegend1(leg1) leg1.AddEntry(h_mc, 'MC') leg1.AddEntry(h_data, 'Data') leg1.Draw() atlas_template(top) top.Update() canvas2.SaveAs(localpath + '/distribution_ring_' + str(r + 1) + '.pdf') figures['rings'].append(localpath + '/distribution_ring_' + str(r + 1) + '.pdf') """ Plot ringer mean shapes """ h_mean_data = TH1F('h_mean_data', '', 100, 0, 100) h_mean_mc = TH1F('h_mean_mc', '', 100, 0, 100) for bin in range(100): self.setDir('MonteCarlo') h_mc = sumAllRegions('rings/ring_' + str(bin)) self.setDir('Data') h_data = sumAllRegions('rings/ring_' + str(bin)) h_mean_data.SetBinContent(bin + 1, h_data.GetMean()) h_mean_mc.SetBinContent(bin + 1, h_mc.GetMean()) canvas3 = TCanvas('canvas3', 'canvas3', 2500, 1600) drawopt = 'pE1' canvas3.cd() top = TPad("pad_top", "This is the top pad", 0.0, ratio_size_as_fraction, 1.0, 1.0) top.SetBottomMargin(0.0) top.SetBottomMargin(0.06 / float(top.GetHNDC())) #top.SetTopMargin (0.04/float(top.GetHNDC())) top.SetRightMargin(0.05) top.SetLeftMargin(0.16) top.SetFillColor(0) top.Draw(drawopt) canvas3.cd() bot = TPad("pad_bot", "This is the bottom pad", 0.0, 0.0, 1.0, ratio_size_as_fraction) bot.SetBottomMargin(0.10 / float(bot.GetHNDC())) #bot.SetTopMargin (0.02/float(bot.GetHNDC())) bot.SetTopMargin(0.0) bot.SetRightMargin(0.05) bot.SetLeftMargin(0.16) bot.SetFillColor(0) bot.Draw(drawopt) gStyle.SetOptStat(000000) from ROOT import TH1, kGray divide = "" drawopt = 'pE1' bot.cd() ref = h_mean_mc.Clone() h = h_mean_data.Clone() ref.Sumw2() h.Sumw2() ratioplot = h.Clone() ratioplot.Sumw2() ratioplot.SetName(h.GetName() + '_ratio') ratioplot.Divide(h, ref, 1., 1., '') ratioplot.SetFillColor(0) ratioplot.SetFillStyle(0) ratioplot.SetMarkerColor(1) ratioplot.SetLineColor(kGray) ratioplot.SetMarkerStyle(24) ratioplot.SetMarkerSize(1.2) ratioplot.GetYaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetRangeUser(-1.6, 3.7) ratioplot.GetYaxis().SetTitleOffset(0.7) ratioplot.GetYaxis().SetTitle('Data/MC') ratioplot.GetXaxis().SetTitle('Rings') ratioplot.Draw(drawopt) from ROOT import TLine nbins = h_mean_data.GetNbinsX() xmin = h_mean_data.GetXaxis().GetBinLowEdge(1) xmax = h_mean_data.GetXaxis().GetBinLowEdge(nbins + 1) l1 = TLine(xmin, 1, xmax, 1) l1.SetLineColor(kRed) l1.SetLineStyle(2) l1.Draw() bot.Update() top.cd() h_mean_mc.SetFillColor(basecolor) h_mean_mc.SetLineWidth(1) h_mean_mc.SetLineColor(basecolor) h_mean_data.SetLineColor(kBlack) h_mean_data.SetLineWidth(1) #h_mean_mc.Scale( 1./h_mean_mc.GetEntries() ) #h_mean_data.Scale( 1./h_mean_data.GetEntries() ) if h_mean_mc.GetMaximum() > h_mean_data.GetMaximum(): ymin = h_mean_mc.GetMinimum() ymax = h_mean_mc.GetMaximum() h_mean_mc.Draw() h_mean_mc.GetYaxis().SetTitle('E[Ring] MeV') h_mean_data.Draw('same') else: ymin = h_mean_data.GetMinimum() ymax = h_mean_data.GetMaximum() h_mean_data.GetYaxis().SetTitle('E[Ring] MeV') h_mean_data.Draw() h_mean_mc.Draw('same') h_mean_data.Draw('same') # prepare ringer lines def gen_line_90(x, ymin, ymax, text): from ROOT import TLine, TLatex ymax = 1.05 * ymax l = TLine(x, ymin, x, ymax) l.SetLineStyle(2) l.Draw() txt = TLatex() txt.SetTextFont(12) txt.SetTextAngle(90) txt.SetTextSize(0.04) txt.DrawLatex(x - 1, (ymax - ymin) / 2., text) return l, txt l_ps, t_ps = gen_line_90(8, ymin, ymax, 'presampler') l_em1, t_em1 = gen_line_90(72, ymin, ymax, 'EM.1') l_em2, t_em2 = gen_line_90(80, ymin, ymax, 'EM.2') l_em3, t_em3 = gen_line_90(88, ymin, ymax, 'EM.3') l_had1, t_had1 = gen_line_90(92, ymin, ymax, 'Had.1') l_had2, t_had2 = gen_line_90(96, ymin, ymax, 'Had.2') l_had3, t_had3 = gen_line_90(100, ymin, ymax, 'Had.3') leg1 = TLegend(0.8, 0.70, 0.95, 0.95) setLegend1(leg1) leg1.AddEntry(h_mean_mc, 'MC') leg1.AddEntry(h_mean_data, 'Data') leg1.Draw() atlas_template(top) top.Update() canvas3.SaveAs(localpath + '/ringer_profile.pdf') figures['ringer_profile'] = localpath + '/ringer_profile.pdf' """ Plot all NN distributions for each calo region """ for algname in self._discrList: for etBinIdx in range(len(self._etBins) - 1): for etaBinIdx in range(len(self._etaBins) - 1): binningname = ('et%d_eta%d') % (etBinIdx, etaBinIdx) path = self._basepath + '/MonteCarlo/' + binningname h_mc = self.storeSvc().histogram( path + '/' + algname + '/discriminantVsMu').ProjectionX().Clone() path = self._basepath + '/Data/' + binningname h_data = self.storeSvc().histogram( path + '/' + algname + '/discriminantVsMu').ProjectionX().Clone() h_mc.Rebin(10) h_data.Rebin(10) h_mc.Scale(1. / h_mc.GetMaximum()) h_data.Scale(1. / h_data.GetMaximum()) canvas4 = TCanvas('canvas4', 'canvas4', 2500, 1600) drawopt = 'pE1' canvas4.cd() top = TPad("pad_top", "This is the top pad", 0.0, ratio_size_as_fraction, 1.0, 1.0) top.SetBottomMargin(0.0) top.SetBottomMargin(0.06 / float(top.GetHNDC())) #top.SetTopMargin (0.04/float(top.GetHNDC())) top.SetRightMargin(0.05) top.SetLeftMargin(0.16) top.SetFillColor(0) top.Draw(drawopt) canvas4.cd() bot = TPad("pad_bot", "This is the bottom pad", 0.0, 0.0, 1.0, ratio_size_as_fraction) bot.SetBottomMargin(0.10 / float(bot.GetHNDC())) bot.SetTopMargin(0.0) bot.SetRightMargin(0.05) bot.SetLeftMargin(0.16) bot.SetFillColor(0) bot.Draw(drawopt) gStyle.SetOptStat(000000) from ROOT import TH1, kGray divide = "" drawopt = 'pE1' bot.cd() ref = h_mc.Clone() h = h_data.Clone() ref.Sumw2() h.Sumw2() ratioplot = h.Clone() ratioplot.Sumw2() ratioplot.SetName(h.GetName() + '_ratio') ratioplot.Divide(h, ref, 1., 1., '') ratioplot.SetFillColor(0) ratioplot.SetFillStyle(0) ratioplot.SetMarkerColor(1) ratioplot.SetLineColor(kGray) ratioplot.SetMarkerStyle(24) ratioplot.SetMarkerSize(1.2) ratioplot.GetYaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetTitleSize(0.10) ratioplot.GetXaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetLabelSize(0.10) ratioplot.GetYaxis().SetRangeUser(-1.6, 3.7) ratioplot.GetYaxis().SetTitleOffset(0.7) ratioplot.GetYaxis().SetTitle('Data/MC') ratioplot.GetXaxis().SetTitle( 'Neural Network (Discriminant)') ratioplot.Draw(drawopt) from ROOT import TLine nbins = h_data.GetNbinsX() xmin = h_data.GetXaxis().GetBinLowEdge(1) xmax = h_data.GetXaxis().GetBinLowEdge(nbins + 1) l1 = TLine(xmin, 1, xmax, 1) l1.SetLineColor(kRed) l1.SetLineStyle(2) l1.Draw() bot.Update() top.cd() h_mc.SetFillColor(basecolor) h_mc.SetLineWidth(1) h_mc.SetLineColor(basecolor) h_data.SetLineColor(kBlack) h_data.SetLineWidth(1) h_mc.Scale(1. / h_mc.GetMaximum()) h_data.Scale(1. / h_data.GetMaximum()) h_mc.GetYaxis().SetTitle( ('Counts (%s)') % (binningname.replace('_', ','))) h_mc.Draw() h_data.Draw('same') leg1 = TLegend(0.8, 0.70, 0.95, 0.95) setLegend1(leg1) leg1.AddEntry(h_mc, 'MC') leg1.AddEntry(h_data, 'Data') leg1.Draw() atlas_template(top) top.Update() canvas4.SaveAs(localpath + '/' + algname + '_rnnOutput_' + binningname + '.pdf') figures['rnnOutput'].append(localpath + '/' + algname + '_rnnOutput_' + binningname + '.pdf') #from RingerCore.tex.TexAPI import * from RingerCore.tex.BeamerAPI import BeamerTexReportTemplate1, BeamerSection, BeamerMultiFigureSlide, BeamerFigureSlide with BeamerTexReportTemplate1(theme='Berlin', _toPDF=True, title=pdftitle, outputFile=pdfoutput, font='structurebold'): with BeamerSection(name='Shower Shapes'): BeamerMultiFigureSlide( title='Shower Shapes (MC and Data)', paths=[figures['shower_shapes']], nDivWidth=1 # x , nDivHeight=1 # y , texts=None, fortran=False, usedHeight=0.8, usedWidth=1.1) with BeamerSection(name='Ringer Shapes Profile'): BeamerMultiFigureSlide( title='Ringer Profile (MC and Data)', paths=[figures['ringer_profile']], nDivWidth=1 # x , nDivHeight=1 # y , texts=None, fortran=False, usedHeight=0.8, usedWidth=1.1) with BeamerSection(name='Ringer Shapes'): for s in range(4): paths1 = [ path for path in figures['rings'][s * 25:s * 25 + 25] ] BeamerMultiFigureSlide( title='Ringer Shapes (MC and Data)', paths=paths1, nDivWidth=5 # x , nDivHeight=5 # y , texts=None, fortran=False, usedHeight=0.8, usedWidth=1.1) for algname in self._discrList: with BeamerSection(name=('%s Neural Network Output') % (algname.replace('_', '\_'))): paths2 = [] for etBinIdx in range(len(self._etBins) - 1): for etaBinIdx in range(len(self._etaBins) - 1): binningname = ('et%d_eta%d') % (etBinIdx, etaBinIdx) paths2.append(localpath + '/' + algname + '_rnnOutput_' + binningname + '.pdf') BeamerMultiFigureSlide( title=algname.replace('_', '\_'), paths=paths2, nDivWidth=len(self._etaBins) # x , nDivHeight=len(self._etBins) # y , texts=None, fortran=False, usedHeight=1.0, usedWidth=1.1) return StatusCode.SUCCESS
def initialize(self): self._logger.info('Initializing EventReader...') ### Prepare to loop: self._t = ROOT.TChain() for inputFile in progressbar(self._fList, len(self._fList), logger=self._logger, prefix="Creating collection tree "): # Check if file exists self._f = ROOT.TFile.Open(inputFile, 'read') if not self._f or self._f.IsZombie(): self._warning('Couldn' 't open file: %s', inputFile) continue # Inform user whether TTree exists, and which options are available: self._debug("Adding file: %s", inputFile) # Custon directory token if '*' in self._treePath: dirname = self._f.GetListOfKeys()[0].GetName() treePath = self._treePath.replace('*', dirname) else: treePath = self._treePath obj = self._f.Get(treePath) if not obj: self._warning("Couldn't retrieve TTree (%s)!", treePath) self._info("File available info:") self._f.ReadAll() self._f.ReadKeys() self._f.ls() continue elif not isinstance(obj, ROOT.TTree): self._fatal("%s is not an instance of TTree!", treePath, ValueError) self._t.Add(inputFile + '/' + treePath) # Turn all branches off. self._t.SetBranchStatus("*", False) # RingerPhysVal hold the address of required branches if self._dataframe is DataframeEnum.SkimmedNtuple: self._event = ROOT.SkimmedNtuple() elif self._dataframe is DataframeEnum.PhysVal: self._event = ROOT.RingerPhysVal() else: return StatusCode.FATAL # Ready to retrieve the total number of events self._t.GetEntry(0) ## Allocating memory for the number of entries self._entries = self._t.GetEntries() self._logger.info("Creating containers...") # Allocating containers from TrigEgammaDevelopments.dataframe.Electron import Electron from TrigEgammaDevelopments.dataframe.FastCalo import FastCalo from TrigEgammaDevelopments.dataframe.EventInfo import EventInfo from TrigEgammaDevelopments.dataframe.MonteCarlo import MonteCarlo # Initialize the base of this container self._containersSvc = { 'Electron': Electron(), 'FastCalo': FastCalo(), 'EventInfo': EventInfo(), 'MonteCarlo': MonteCarlo(), } # force the event id number for this event looper self._containersSvc['EventInfo'].setId(self.id()) # configure all EDMs needed for key, edm in self._containersSvc.iteritems(): # add properties edm.dataframe = self._dataframe edm.tree = self._t edm.level = self._level edm.event = self._event edm.setSvc(self._containersSvc) # If initializations is failed, we must remove this from the container # service if (edm.initialize().isFailure()): self._logger.warning('Impossible to create the EDM: %s', key) # Create the StoreGate service if not self._storegateSvc: self._logger.info("Creating StoreGate...") from RingerCore import StoreGate self._storegateSvc = StoreGate(self._ofile) else: self._logger.info( 'The StoraGate was created for ohter service. Using the service setted by client.' ) return StatusCode.SUCCESS
def loop(self, **kw): import gc output = kw.pop('output' , 'Mon' ) tuningReport = kw.pop('tuningReport', 'tuningReport' ) doBeamer = kw.pop('doBeamer' , True ) shortSlides = kw.pop('shortSlides' , False ) debug = kw.pop('debug' , False ) overwrite = kw.pop('overwrite' , False ) basepath=output basepath+=('_et%d_eta%d')%(self._infoObjs[0].etbin(),self._infoObjs[0].etabin()) if not overwrite and os.path.isdir( basepath ): self._logger.warning("Monitoring output path already exists!") return if shortSlides: self._logger.warning('Short slides enabled! Doing only tables...') if debug: self._logger.warning('Debug mode activated!') wantedPlotNames = {'allBestTstSorts','allBestOpSorts','allWorstTstSorts', 'allWorstOpSorts',\ 'allBestTstNeurons','allBestOpNeurons', 'allWorstTstNeurons', 'allWorstOpNeurons'} perfBenchmarks = dict() pathBenchmarks = dict() from PlotHolder import PlotHolder from PlotHelper import plot_4c, plot_rocs, plot_nnoutput from TuningMonitoringInfo import MonitoringPerfInfo #Loop over benchmarks for infoObj in self._infoObjs: #Initialize all plos plotObjects = dict() perfObjects = dict() infoObjects = dict() pathObjects = dict() #Init PlotsHolder for plotname in wantedPlotNames: if 'Sorts' in plotname: plotObjects[plotname] = PlotHolder(label = 'Sort') else: plotObjects[plotname] = PlotHolder(label = 'Neuron') #Retrieve benchmark name benchmarkName = infoObj.name() #Retrieve reference name reference = infoObj.reference() #summary csummary = infoObj.summary() #benchmark object cbenchmark = infoObj.rawBenchmark() # reference value refVal = infoObj.rawBenchmark()['refVal'] #Eta bin etabin = infoObj.etabin() #Et bin etbin = infoObj.etbin() self._logger.info(('Start loop over the benchmark: %s and etaBin = %d etBin = %d')%(benchmarkName,etabin, etbin) ) import copy args = dict() args['reference'] = reference args['refVal'] = refVal args['eps'] = cbenchmark['eps'] self._logger.info('Creating plots...') # Creating plots for neuron in progressbar(infoObj.neuronBounds(), len(infoObj.neuronBounds()), 'Loading : ', 60, False, logger=self._logger): # Figure path location currentPath = ('%s/figures/%s/%s') % (basepath,benchmarkName,'neuron_'+str(neuron)) neuronName = 'config_'+str(neuron).zfill(3) # Create folder to store all plot objects mkdir_p(currentPath) #Clear all hold plots stored plotObjects['allBestTstSorts'].clear() plotObjects['allBestOpSorts'].clear() infoObjects['allInfoOpBest_'+neuronName] = list() #plotObjects['allWorstTstSorts'].clear() #plotObjects['allWorstOpSorts'].clear() for sort in infoObj.sortBounds(neuron): sortName = 'sort_'+str(sort).zfill(3) #Init bounds initBounds = infoObj.initBounds(neuron,sort) #Create path list from initBound list initPaths = [('%s/%s/%s/init_%s')%(benchmarkName,neuronName,sortName,init) for init in initBounds] self._logger.debug('Creating init plots into the path: %s, (neuron_%s,sort_%s)', \ benchmarkName, neuron, sort) obj = PlotHolder(label = 'Init') try: #Create plots holder class (Helper), store all inits obj.retrieve(self._rootObj, initPaths) except RuntimeError: self._logger.fatal('Can not create plot holder object') #Hold all inits from current sort obj.set_index_correction(initBounds) obj.set_best_index( csummary[neuronName][sortName]['infoTstBest']['init'] ) obj.set_worst_index( csummary[neuronName][sortName]['infoTstWorst']['init'] ) plotObjects['allBestTstSorts'].append( copy.deepcopy(obj.get_best() ) ) obj.set_best_index( csummary[neuronName][sortName]['infoOpBest']['init'] ) obj.set_worst_index( csummary[neuronName][sortName]['infoOpWorst']['init'] ) plotObjects['allBestOpSorts'].append( copy.deepcopy(obj.get_best() ) ) #plotObjects['allWorstTstSorts'].append( copy.deepcopy(tstObj.getBest() ) #plotObjects['allWorstOpSorts'].append( copy.deepcopy(opObj.getBest() ) infoObjects['allInfoOpBest_'+neuronName].append( copy.deepcopy(csummary[neuronName][sortName]['infoOpBest']) ) #Release memory del obj #Loop over sorts gc.collect() plotObjects['allBestTstSorts'].set_index_correction( infoObj.sortBounds(neuron) ) plotObjects['allBestOpSorts'].set_index_correction( infoObj.sortBounds(neuron) ) #plotObjects['allWorstTstSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) #plotObjects['allWorstOpSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) # Best and worst sorts for this neuron configuration plotObjects['allBestTstSorts'].set_best_index( csummary[neuronName]['infoTstBest']['sort'] ) plotObjects['allBestTstSorts'].set_worst_index( csummary[neuronName]['infoTstWorst']['sort'] ) plotObjects['allBestOpSorts'].set_best_index( csummary[neuronName]['infoOpBest']['sort'] ) plotObjects['allBestOpSorts'].set_worst_index( csummary[neuronName]['infoOpWorst']['sort'] ) # Hold the information from the best and worst discriminator for this neuron infoObjects['infoOpBest_'+neuronName] = copy.deepcopy(csummary[neuronName]['infoOpBest']) infoObjects['infoOpWorst_'+neuronName] = copy.deepcopy(csummary[neuronName]['infoOpWorst']) # Best and worst neuron sort for this configuration plotObjects['allBestTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_best() )) plotObjects['allBestOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_best() )) plotObjects['allWorstTstNeurons'].append(copy.deepcopy(plotObjects['allBestTstSorts'].get_worst() )) plotObjects['allWorstOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_worst() )) # Create perf (tables) Objects for test and operation (Table) perfObjects[neuronName] = MonitoringPerfInfo(benchmarkName, reference, csummary[neuronName]['summaryInfoTst'], csummary[neuronName]['infoOpBest'], cbenchmark) # Debug information self._logger.debug(('Crossval indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst, plotObjects['allBestTstSorts'].get_worst()['bestInit'])) self._logger.debug(('Operation indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst, plotObjects['allBestOpSorts'].get_worst()['bestInit'])) # Figure 1: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestTstSorts'].size(),etabin, etbin, plotObjects['allBestTstSorts'].best, \ plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst,\ plotObjects['allBestTstSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_val')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = False args['paintListIdx'] = [plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst] pname1 = plot_4c(plotObjects['allBestTstSorts'], args) # Figure 2: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. But, here the painted curves represented the best and the worst # curve from the operation dataset. In other words, we pass all events into the # network and get the efficiencis than we choose the best operation and the worst # operation network and paint the validation curve who represent these sorts. # There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d (operation)}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestOpSorts'].size(),etabin, etbin, plotObjects['allBestOpSorts'].best, \ plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst,\ plotObjects['allBestOpSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_op')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = True args['paintListIdx'] = [plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst] pname2 = plot_4c(plotObjects['allBestOpSorts'], args) # Figure 3: This figure show us in deteails the best operation network for the current hidden # layer and benchmark analysis. Depend on the benchmark, we draw lines who represents the # stops for each curve. The current neuron will be the last position of the plotObjects splotObject = PlotHolder() args['label'] = ('#splitline{#splitline{Best network neuron: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{}}') % \ (neuron,etabin, etbin, plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_best_op')%(currentPath,benchmarkName,neuron) args['set'] = 'val' args['operation'] = True splotObject.append( plotObjects['allBestOpNeurons'][-1] ) pname3 = plot_4c(splotObject, args) # Figure 4: Here, we have a plot of the discriminator output for all dataset. Black histogram # represents the signal and the red onces represent the background. TODO: Apply this outputs # using the feedfoward manual method to generate the network outputs and create the histograms. args['cname'] = ('%s/plot_%s_neuron_%s_best_op_output')%(currentPath,benchmarkName,neuron) args['nsignal'] = self._data[0].shape[0] args['nbackground'] = self._data[1].shape[0] sbest = plotObjects['allBestOpNeurons'][-1]['bestSort'] args['cut'] = csummary[neuronName]['sort_'+str(sbest).zfill(3)]['infoOpBest']['cut'] args['rocname'] = 'roc_op' pname4 = plot_nnoutput(splotObject,args) # Figure 5: The receive operation test curve for all sorts using the test dataset as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_tst')%(currentPath,benchmarkName,neuron) args['set'] = 'tst' args['paintListIdx'] = [plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst] pname5 = plot_rocs(plotObjects['allBestTstSorts'], args) # Figure 6: The receive operation curve for all sorts using the operation dataset (train+test) as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_op')%(currentPath,benchmarkName,neuron) args['set'] = 'op' args['paintListIdx'] = [plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst] pname6 = plot_rocs(plotObjects['allBestOpSorts'], args) # Map names for beamer, if you add a plot, you must add into # the path objects holder pathObjects['neuron_'+str(neuron)+'_sorts_val'] = pname1 pathObjects['neuron_'+str(neuron)+'_sort_op'] = pname2 pathObjects['neuron_'+str(neuron)+'_best_op'] = pname3 pathObjects['neuron_'+str(neuron)+'_best_op_output'] = pname4 pathObjects['neuron_'+str(neuron)+'_sorts_roc_tst'] = pname5 pathObjects['neuron_'+str(neuron)+'_sorts_roc_op'] = pname6 if debug: break #Loop over neurons #External pathBenchmarks[benchmarkName] = pathObjects perfBenchmarks[benchmarkName] = perfObjects #Release memory for xname in plotObjects.keys(): del plotObjects[xname] gc.collect() #if debug: break #Loop over benchmark #Start beamer presentation if doBeamer: from BeamerMonReport import BeamerMonReport from BeamerTemplates import BeamerPerfTables, BeamerFigure, BeamerBlocks #Eta bin etabin = self._infoObjs[0].etabin() #Et bin etbin = self._infoObjs[0].etbin() #Create the beamer manager reportname = ('%s_et%d_eta%d')%(output,etbin,etabin) beamer = BeamerMonReport(basepath+'/'+reportname, title = ('Tuning Report (et=%d, eta=%d)')%(etbin,etabin) ) neuronBounds = self._infoObjs[0].neuronBounds() for neuron in neuronBounds: #Make the tables for crossvalidation ptableCross = BeamerPerfTables(frametitle= ['Neuron '+str(neuron)+': Cross Validation Performance', 'Neuron '+str(neuron)+": Operation Best Network"], caption=['Efficiencies from each benchmark.', 'Efficiencies for the best operation network']) block = BeamerBlocks('Neuron '+str(neuron)+' Analysis', [('All sorts (validation)','All sorts evolution are ploted, each sort represents the best init;'), ('All sorts (operation)', 'All sorts evolution only for operation set;'), ('Best operation', 'Detailed analysis from the best sort discriminator.'), ('Tables','Cross validation performance')]) if not shortSlides: block.tolatex( beamer.file() ) for info in self._infoObjs: #If we produce a short presentation, we do not draw all plots if not shortSlides: bname = info.name().replace('OperationPoint_','') fig1 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_val'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': All sorts (validation)') fig2 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_roc_tst'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': All ROC sorts (validation)') fig3 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sort_op'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': All sorts (operation)') fig4 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_sorts_roc_op'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': All ROC sorts (operation)') fig5 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_best_op'].replace(basepath+'/',''), 0.7, frametitle=bname+', Neuron '+str(neuron)+': Best Network') fig6 = BeamerFigure( pathBenchmarks[info.name()]['neuron_'+str(neuron)+'_best_op_output'].replace(basepath+'/',''), 0.8, frametitle=bname+', Neuron '+str(neuron)+': Best Network output') #Draw figures into the tex file fig1.tolatex( beamer.file() ) fig2.tolatex( beamer.file() ) fig3.tolatex( beamer.file() ) fig4.tolatex( beamer.file() ) fig5.tolatex( beamer.file() ) fig6.tolatex( beamer.file() ) #Concatenate performance table, each line will be a benchmark #e.g: det, sp and fa ptableCross.add( perfBenchmarks[info.name()]['config_'+str(neuron).zfill(3)] ) #if debug: break ptableCross.tolatex( beamer.file() )# internal switch is false to true: test ptableCross.tolatex( beamer.file() )# internal swotch is true to false: operation if debug: break beamer.close() self._logger.info('Done! ')
def loop(self, **kw): from scipy.io import loadmat import gc output = kw.pop('output', 'Mon') tuningReport = kw.pop('tuningReport', 'tuningReport') doBeamer = kw.pop('doBeamer', True) shortSlides = kw.pop('shortSlides', False) debug = kw.pop('debug', False) overwrite = kw.pop('overwrite', False) choicesfile = kw.pop('choicesfile', None) basepath = output basepath += ('_et%d_eta%d') % (self._infoObjs[0].etbinidx(), self._infoObjs[0].etabinidx()) if choicesfile: choices = loadmat(choicesfile) if not overwrite and os.path.isdir(basepath): self._logger.warning("Monitoring output path already exists!") return if shortSlides: self._logger.warning('Short slides enabled! Doing only tables...') if debug: self._logger.warning('Debug mode activated!') wantedPlotNames = {'allBestTstSorts','allBestOpSorts','allWorstTstSorts', 'allWorstOpSorts',\ 'allBestTstNeurons','allBestOpNeurons', 'allWorstTstNeurons', 'allWorstOpNeurons'} perfBenchmarks = dict() pathBenchmarks = dict() from PlotHolder import PlotHolder from PlotHelper import plot_4c, plot_rocs, plot_nnoutput from TuningMonitoringInfo import MonitoringPerfInfo #Loop over benchmarks for infoObj in self._infoObjs: #Initialize all plos plotObjects = dict() perfObjects = dict() infoObjects = dict() pathObjects = dict() #Init PlotsHolder for plotname in wantedPlotNames: if 'Sorts' in plotname: plotObjects[plotname] = PlotHolder(label='Sort') else: plotObjects[plotname] = PlotHolder(label='Neuron') # keyboard() #Retrieve benchmark name benchmarkName = infoObj.name() #Retrieve reference name reference = infoObj.reference() #summary csummary = infoObj.summary() #benchmark object cbenchmark = infoObj.rawBenchmark() # etBin = infoObj.etbin() # reference value refVal = infoObj.rawBenchmark()['refVal'] #Eta bin etabinidx = infoObj.etabinidx() #Et bin etbinidx = infoObj.etbinidx() #Eta bin etabin = infoObj.etabin() #Et bin etbin = infoObj.etbin() self._logger.info( ('Start loop over the benchmark: %s and etaBin = %d etBin = %d' ) % (benchmarkName, etabinidx, etbinidx)) import copy args = dict() args['reference'] = reference args['refVal'] = refVal args['eps'] = cbenchmark['eps'] self._logger.info('Creating plots...') # Creating plots for neuron in progressbar(infoObj.neuronBounds(), len(infoObj.neuronBounds()), 'Loading : ', 60, False, logger=self._logger): if choicesfile: neuron = choices['choices'][infoObj.name().split( '_')[-1]][0][0][etbinidx][etabinidx] # Figure path location currentPath = ('%s/figures/%s/%s') % (basepath, benchmarkName, 'neuron_' + str(neuron)) neuronName = 'config_' + str(neuron).zfill(3) # Create folder to store all plot objects mkdir_p(currentPath) #Clear all hold plots stored plotObjects['allBestTstSorts'].clear() plotObjects['allBestOpSorts'].clear() infoObjects['allInfoOpBest_' + neuronName] = list() #plotObjects['allWorstTstSorts'].clear() #plotObjects['allWorstOpSorts'].clear() for sort in infoObj.sortBounds(neuron): sortName = 'sort_' + str(sort).zfill(3) #Init bounds initBounds = infoObj.initBounds(neuron, sort) #Create path list from initBound list initPaths = [('%s/%s/%s/init_%s') % (benchmarkName, neuronName, sortName, init) for init in initBounds] self._logger.debug('Creating init plots into the path: %s, (neuron_%s,sort_%s)', \ benchmarkName, neuron, sort) obj = PlotHolder(label='Init') try: #Create plots holder class (Helper), store all inits obj.retrieve(self._rootObj, initPaths) except RuntimeError: self._logger.fatal('Can not create plot holder object') #Hold all inits from current sort obj.set_index_correction(initBounds) obj.set_best_index( csummary[neuronName][sortName]['infoTstBest']['init']) obj.set_worst_index( csummary[neuronName][sortName]['infoTstWorst']['init']) plotObjects['allBestTstSorts'].append( copy.deepcopy(obj.get_best())) obj.set_best_index( csummary[neuronName][sortName]['infoOpBest']['init']) obj.set_worst_index( csummary[neuronName][sortName]['infoOpWorst']['init']) plotObjects['allBestOpSorts'].append( copy.deepcopy(obj.get_best())) #plotObjects['allWorstTstSorts'].append( copy.deepcopy(tstObj.getBest() ) #plotObjects['allWorstOpSorts'].append( copy.deepcopy(opObj.getBest() ) infoObjects['allInfoOpBest_' + neuronName].append( copy.deepcopy( csummary[neuronName][sortName]['infoOpBest'])) #Release memory del obj #Loop over sorts gc.collect() plotObjects['allBestTstSorts'].set_index_correction( infoObj.sortBounds(neuron)) plotObjects['allBestOpSorts'].set_index_correction( infoObj.sortBounds(neuron)) #plotObjects['allWorstTstSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) #plotObjects['allWorstOpSorts'].setIdxCorrection( infoObj.sortBounds(neuron) ) # Best and worst sorts for this neuron configuration plotObjects['allBestTstSorts'].set_best_index( csummary[neuronName]['infoTstBest']['sort']) plotObjects['allBestTstSorts'].set_worst_index( csummary[neuronName]['infoTstWorst']['sort']) plotObjects['allBestOpSorts'].set_best_index( csummary[neuronName]['infoOpBest']['sort']) plotObjects['allBestOpSorts'].set_worst_index( csummary[neuronName]['infoOpWorst']['sort']) # Hold the information from the best and worst discriminator for this neuron infoObjects['infoOpBest_' + neuronName] = copy.deepcopy( csummary[neuronName]['infoOpBest']) infoObjects['infoOpWorst_' + neuronName] = copy.deepcopy( csummary[neuronName]['infoOpWorst']) # Best and worst neuron sort for this configuration plotObjects['allBestTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_best())) plotObjects['allBestOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_best())) plotObjects['allWorstTstNeurons'].append( copy.deepcopy(plotObjects['allBestTstSorts'].get_worst())) plotObjects['allWorstOpNeurons'].append( copy.deepcopy(plotObjects['allBestOpSorts'].get_worst())) # Create perf (tables) Objects for test and operation (Table) perfObjects[neuronName] = MonitoringPerfInfo( benchmarkName, reference, csummary[neuronName]['summaryInfoTst'], csummary[neuronName]['infoOpBest'], cbenchmark) # Debug information self._logger.debug(('Crossval indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst, plotObjects['allBestTstSorts'].get_worst()['bestInit'])) self._logger.debug(('Operation indexs: (bestSort = %d, bestInit = %d) (worstSort = %d, bestInit = %d)')%\ (plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst, plotObjects['allBestOpSorts'].get_worst()['bestInit'])) # Figure 1: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestTstSorts'].size(),etabinidx, etbinidx, plotObjects['allBestTstSorts'].best, \ plotObjects['allBestTstSorts'].get_best()['bestInit'], plotObjects['allBestTstSorts'].worst,\ plotObjects['allBestTstSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_val') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = False args['paintListIdx'] = [ plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst ] pname1 = plot_4c(plotObjects['allBestTstSorts'], args) # Figure 2: Plot all validation/test curves for all crossval sorts tested during # the training. The best sort will be painted with black and the worst sort will # be on red color. But, here the painted curves represented the best and the worst # curve from the operation dataset. In other words, we pass all events into the # network and get the efficiencis than we choose the best operation and the worst # operation network and paint the validation curve who represent these sorts. # There is a label that will be draw into the figure to show # the current location (neuron, sort, init) of the best and the worst network. args['label'] = ('#splitline{#splitline{Total sorts: %d (operation)}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{sWorstIdx: %d iBestIdx: %d}}') % \ (plotObjects['allBestOpSorts'].size(),etabinidx, etbinidx, plotObjects['allBestOpSorts'].best, \ plotObjects['allBestOpSorts'].get_best()['bestInit'], plotObjects['allBestOpSorts'].worst,\ plotObjects['allBestOpSorts'].get_worst()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_sorts_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = True args['paintListIdx'] = [ plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst ] pname2 = plot_4c(plotObjects['allBestOpSorts'], args) # Figure 3: This figure show us in deteails the best operation network for the current hidden # layer and benchmark analysis. Depend on the benchmark, we draw lines who represents the # stops for each curve. The current neuron will be the last position of the plotObjects splotObject = PlotHolder() args['label'] = ('#splitline{#splitline{Best network neuron: %d}{etaBin: %d, etBin: %d}}'+\ '{#splitline{sBestIdx: %d iBestIdx: %d}{}}') % \ (neuron,etabinidx, etbinidx, plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].get_best()['bestInit']) args['cname'] = ('%s/plot_%s_neuron_%s_best_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'val' args['operation'] = True splotObject.append(plotObjects['allBestOpNeurons'][-1]) pname3 = plot_4c(splotObject, args) # Figure 4: Here, we have a plot of the discriminator output for all dataset. Black histogram # represents the signal and the red onces represent the background. TODO: Apply this outputs # using the feedfoward manual method to generate the network outputs and create the histograms. args['cname'] = ('%s/plot_%s_neuron_%s_best_op_output') % ( currentPath, benchmarkName, neuron) args['nsignal'] = self._data[0].shape[0] args['nbackground'] = self._data[1].shape[0] sbest = plotObjects['allBestOpNeurons'][-1]['bestSort'] args['cut'] = csummary[neuronName][ 'sort_' + str(sbest).zfill(3)]['infoOpBest']['cut'] args['rocname'] = 'roc_operation' pname4 = plot_nnoutput(splotObject, args) # Figure 5: The receive operation test curve for all sorts using the test dataset as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_tst') % ( currentPath, benchmarkName, neuron) args['set'] = 'tst' args['paintListIdx'] = [ plotObjects['allBestTstSorts'].best, plotObjects['allBestTstSorts'].worst ] pname5 = plot_rocs(plotObjects['allBestTstSorts'], args) # Figure 6: The receive operation curve for all sorts using the operation dataset (train+test) as base. # Here, we will draw the current tunnel and ref value used to set the discriminator threshold # when the bechmark are Pd or Pf case. When we use the SP case, this tunnel will not be ploted. # The black curve represents the best sort and the red onces the worst sort. TODO: Put the SP # point for the best and worst when the benchmark case is SP. args['cname'] = ('%s/plot_%s_neuron_%s_sorts_roc_op') % ( currentPath, benchmarkName, neuron) args['set'] = 'operation' args['paintListIdx'] = [ plotObjects['allBestOpSorts'].best, plotObjects['allBestOpSorts'].worst ] pname6 = plot_rocs(plotObjects['allBestOpSorts'], args) # Map names for beamer, if you add a plot, you must add into # the path objects holder pathObjects['neuron_' + str(neuron) + '_sorts_val'] = pname1 pathObjects['neuron_' + str(neuron) + '_sort_op'] = pname2 pathObjects['neuron_' + str(neuron) + '_best_op'] = pname3 pathObjects['neuron_' + str(neuron) + '_best_op_output'] = pname4 pathObjects['neuron_' + str(neuron) + '_sorts_roc_tst'] = pname5 pathObjects['neuron_' + str(neuron) + '_sorts_roc_op'] = pname6 if choicesfile: break #Loop over neurons #External pathBenchmarks[benchmarkName] = pathObjects perfBenchmarks[benchmarkName] = perfObjects #Release memory for xname in plotObjects.keys(): del plotObjects[xname] gc.collect() #if debug: break #Loop over benchmark #Eta bin # etabinidx = self._infoObjs[0].etabinidx() #Et bin binBounds = dict() if len(etbin) > 0: binBounds['etbinstr'] = r'$%d < E_{T} \text{[Gev]}<%d$' % etbin else: binBounds['etbinstr'] = r'\text{etBin[%d]}' % etbinidx if len(etabin) > 0: binBounds['etabinstr'] = r'$%.2f<\eta<%.2f$' % etabin else: binBounds['etabinstr'] = r'\text{etaBin[%d]}' % etabinidx perfBounds = dict() perfBounds['bounds'] = binBounds perfBounds['perf'] = perfBenchmarks fname = basepath + '/' + 'perfBounds' save(perfBounds, fname) #Start beamer presentation if doBeamer: from BeamerTemplates import BeamerReport, BeamerTables, BeamerFigure, BeamerBlocks #Eta bin etabin = self._infoObjs[0].etabin() etabinidx = self._infoObjs[0].etabinidx() #Et bin etbin = self._infoObjs[0].etbin() etbinidx = self._infoObjs[0].etbinidx() #Create the beamer manager reportname = ('%s_et%d_eta%d') % (output, etbinidx, etabinidx) beamer = BeamerReport(basepath + '/' + reportname, title=('Tuning Report (et=%d, eta=%d)') % (etbinidx, etabinidx)) neuronBounds = self._infoObjs[0].neuronBounds() for neuron in neuronBounds: #Make the tables for crossvalidation ptableCross = BeamerTables( frametitle=[ 'Neuron ' + str(neuron) + ': Cross Validation Performance', 'Neuron ' + str(neuron) + ": Operation Best Network" ], caption=[ 'Efficiencies from each benchmark.', 'Efficiencies for the best operation network' ]) block = BeamerBlocks('Neuron ' + str(neuron) + ' Analysis', [ ('All sorts (validation)', 'All sorts evolution are ploted, each sort represents the best init;' ), ('All sorts (operation)', 'All sorts evolution only for operation set;'), ('Best operation', 'Detailed analysis from the best sort discriminator.'), ('Tables', 'Cross validation performance') ]) if not shortSlides: block.tolatex(beamer.file()) for info in self._infoObjs: #If we produce a short presentation, we do not draw all plots if not shortSlides: bname = info.name().replace('OperationPoint_', '') fig1 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_sorts_val'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': All sorts (validation)') fig2 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_sorts_roc_tst'].replace(basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': All ROC sorts (validation)') fig3 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_sort_op'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': All sorts (operation)') fig4 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_sorts_roc_op'].replace( basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': All ROC sorts (operation)') fig5 = BeamerFigure( pathBenchmarks[info.name()]['neuron_' + str(neuron) + '_best_op'].replace( basepath + '/', ''), 0.7, frametitle=bname + ', Neuron ' + str(neuron) + ': Best Network') fig6 = BeamerFigure(pathBenchmarks[info.name()][ 'neuron_' + str(neuron) + '_best_op_output'].replace(basepath + '/', ''), 0.8, frametitle=bname + ', Neuron ' + str(neuron) + ': Best Network output') #Draw figures into the tex file fig1.tolatex(beamer.file()) fig2.tolatex(beamer.file()) fig3.tolatex(beamer.file()) fig4.tolatex(beamer.file()) fig5.tolatex(beamer.file()) fig6.tolatex(beamer.file()) #Concatenate performance table, each line will be a benchmark #e.g: det, sp and fa ptableCross.add( perfBenchmarks[info.name()]['config_' + str(neuron).zfill(3)]) #if debug: break ptableCross.tolatex( beamer.file()) # internal switch is false to true: test ptableCross.tolatex(beamer.file( )) # internal swotch is true to false: operation if debug: break beamer.close() self._logger.info('Done! ')
#TODO: Do something elegant here if hasattr(args, 'outputDir'): _outputDir = args.outputDir else: _outputDir = "" if clusterManagerConf() is ClusterManager.Panda: memoryVal = args.get_job_submission_option('memory') # Prepare to run from itertools import product startBin = True for etBin, etaBin in progressbar( product(args.et_bins(), args.eta_bins()), count=len(list(args.et_bins())) * len(list(args.eta_bins())) if args.et_bins() else 1, logger=mainLogger, ): if clusterManagerConf() is ClusterManager.Panda: # When running multiple bins, dump workspace to a file and re-use it: if etBin is not None or etaBin is not None: if startBin: if args.get_job_submission_option( 'outTarBall' ) is None and not args.get_job_submission_option('inTarBall'): args.set_job_submission_option('outTarBall', 'workspace.tar') startBin = False else: if args.get_job_submission_option('outTarBall') is not None: # Swap outtar with intar
def __call__( self, fList, ringerOperation, **kw): """ Read ntuple and return patterns and efficiencies. Arguments: - fList: The file path or file list path. It can be an argument list of two types: o List: each element is a string path to the file; o Comma separated string: each path is separated via a comma o Folders: Expand folders recursively adding also files within them to analysis - ringerOperation: Set Operation type. It can be both a string or the RingerOperation Optional arguments: - filterType [None]: whether to filter. Use FilterType enumeration - reference [Truth]: set reference for targets. Use Reference enumeration - treePath [Set using operation]: set tree name on file, this may be set to use different sources then the default. Default for: o Offline: Offline/Egamma/Ntuple/electron o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger - offEtCut [None]: Set Offline cluster energy cut value - nClusters [None]: Read up to nClusters. Use None to run for all clusters. - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters. - etBins [None]: E_T bins (GeV) where the data should be segmented - etaBins [None]: eta bins where the data should be segmented - ringConfig [100]: A list containing the number of rings available in the data for each eta bin. - crossVal [None]: Whether to measure benchmark efficiency splitting it by the crossVal-validation datasets - extractDet [None]: Which detector to export (use Detector enumeration). Defaults are: o L2Calo: Calorimetry o L2: Tracking o Offline: Calorimetry o Others: CaloAndTrack - standardCaloVariables [False]: Whether to extract standard track variables. - useTRT [False]: Whether to export TRT information when dumping track variables. - supportTriggers [True]: Whether reading data comes from support triggers """ # Offline information branches: __offlineBranches = ['el_et', 'el_eta', #'el_loose', #'el_medium', #'el_tight', 'el_lhLoose', 'el_lhMedium', 'el_lhTight', 'mc_hasMC', 'mc_isElectron', 'mc_hasZMother', 'el_nPileupPrimaryVtx', ] # Online information branches __onlineBranches = [] __l2stdCaloBranches = ['trig_L2_calo_et', 'trig_L2_calo_eta', 'trig_L2_calo_phi', 'trig_L2_calo_e237', # rEta 'trig_L2_calo_e277', # rEta 'trig_L2_calo_fracs1', # F1: fraction sample 1 'trig_L2_calo_weta2', # weta2 'trig_L2_calo_ehad1', # energy on hadronic sample 1 'trig_L2_calo_emaxs1', # eratio 'trig_L2_calo_e2tsts1', # eratio 'trig_L2_calo_wstot',] # wstot __l2trackBranches = [ # Do not add non patter variables on this branch list #'trig_L2_el_pt', #'trig_L2_el_eta', #'trig_L2_el_phi', #'trig_L2_el_caloEta', #'trig_L2_el_charge', #'trig_L2_el_nTRTHits', #'trig_L2_el_nTRTHiThresholdHits', 'trig_L2_el_etOverPt', 'trig_L2_el_trkClusDeta', 'trig_L2_el_trkClusDphi',] # Retrieve information from keyword arguments filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter ) reference = retrieve_kw(kw, 'reference', Reference.Truth ) l1EmClusCut = retrieve_kw(kw, 'l1EmClusCut', None ) l2EtCut = retrieve_kw(kw, 'l2EtCut', None ) efEtCut = retrieve_kw(kw, 'efEtCut', None ) offEtCut = retrieve_kw(kw, 'offEtCut', None ) treePath = retrieve_kw(kw, 'treePath', None ) nClusters = retrieve_kw(kw, 'nClusters', None ) getRates = retrieve_kw(kw, 'getRates', True ) getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False ) etBins = retrieve_kw(kw, 'etBins', None ) etaBins = retrieve_kw(kw, 'etaBins', None ) crossVal = retrieve_kw(kw, 'crossVal', None ) ringConfig = retrieve_kw(kw, 'ringConfig', 100 ) extractDet = retrieve_kw(kw, 'extractDet', None ) standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False ) useTRT = retrieve_kw(kw, 'useTRT', False ) supportTriggers = retrieve_kw(kw, 'supportTriggers', True ) monitoring = retrieve_kw(kw, 'monitoring', None ) pileupRef = retrieve_kw(kw, 'pileupRef', NotSet ) import ROOT, pkgutil #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C"); #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C') if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \ not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ): #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0: self._fatal("Could not load TuningTools library", ImportError) if 'level' in kw: self.level = kw.pop('level') # and delete it to avoid mistakes: checkForUnusedVars( kw, self._warning ) del kw ### Parse arguments # Mutual exclusive arguments: if not getRates and getRatesOnly: self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.") getRates = True # Also parse operation, check if its type is string and if we can # transform it to the known operation enum: fList = csvStr2List ( fList ) fList = expandFolders( fList ) ringerOperation = RingerOperation.retrieve(ringerOperation) reference = Reference.retrieve(reference) if isinstance(l1EmClusCut, str): l1EmClusCut = float(l1EmClusCut) if l1EmClusCut: l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV __onlineBranches.append( 'trig_L1_emClus' ) if l2EtCut: l2EtCut = 1000.*l2EtCut # Put energy in MeV __onlineBranches.append( 'trig_L2_calo_et' ) if efEtCut: efEtCut = 1000.*efEtCut # Put energy in MeV __onlineBranches.append( 'trig_EF_calo_et' ) if offEtCut: offEtCut = 1000.*offEtCut # Put energy in MeV __offlineBranches.append( 'el_et' ) if not supportTriggers: __onlineBranches.append( 'trig_L1_accept' ) # Check if treePath is None and try to set it automatically if treePath is None: treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \ 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH' # Check whether using bins useBins=False; useEtBins=False; useEtaBins=False nEtaBins = 1; nEtBins = 1 # Set the detector which we should extract the information: if extractDet is None: if ringerOperation < 0: extractDet = Detector.Calorimetry elif ringerOperation is RingerOperation.L2Calo: extractDet = Detector.Calorimetry elif ringerOperation is RingerOperation.L2: extractDet = Detector.Tracking else: extractDet = Detector.CaloAndTrack else: extractDet = Detector.retrieve( extractDet ) if etaBins is None: etaBins = npCurrent.fp_array([]) if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins) if etBins is None: etBins = npCurrent.fp_array([]) if type(etBins) is list: etBins=npCurrent.fp_array(etBins) if etBins.size: etBins = etBins * 1000. # Put energy in MeV nEtBins = len(etBins)-1 if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(('Number of et bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins, np.iinfo(npCurrent.scounter_dtype).max) # Flag that we are separating data through bins useBins=True useEtBins=True self._debug('E_T bins enabled.') if not type(ringConfig) is list and not type(ringConfig) is np.ndarray: ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1 if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig) if not len(ringConfig): self._fatal('Rings size must be specified.'); if etaBins.size: nEtaBins = len(etaBins)-1 if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(('Number of eta bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins, np.iinfo(npCurrent.scounter_dtype).max) if len(ringConfig) != nEtaBins: self._fatal(('The number of rings configurations (%r) must be equal than ' 'eta bins (%r) region config'),ringConfig, etaBins) useBins=True useEtaBins=True self._debug('eta bins enabled.') else: self._debug('eta/et bins disabled.') ### Prepare to loop: # Open root file t = ROOT.TChain(treePath) for inputFile in progressbar(fList, len(fList), logger = self._logger, prefix = "Creating collection tree "): # Check if file exists f = ROOT.TFile.Open(inputFile, 'read') if not f or f.IsZombie(): self._warning('Couldn''t open file: %s', inputFile) continue # Inform user whether TTree exists, and which options are available: self._debug("Adding file: %s", inputFile) obj = f.Get(treePath) if not obj: self._warning("Couldn't retrieve TTree (%s)!", treePath) self._info("File available info:") f.ReadAll() f.ReadKeys() f.ls() continue elif not isinstance(obj, ROOT.TTree): self._fatal("%s is not an instance of TTree!", treePath, ValueError) t.Add( inputFile ) # Turn all branches off. t.SetBranchStatus("*", False) # RingerPhysVal hold the address of required branches event = ROOT.RingerPhysVal() # Add offline branches, these are always needed cPos = 0 for var in __offlineBranches: self.__setBranchAddress(t,var,event) # Add online branches if using Trigger if ringerOperation > 0: for var in __onlineBranches: self.__setBranchAddress(t,var,event) ## Allocating memory for the number of entries entries = t.GetEntries() nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \ else nClusters ## Retrieve the dependent operation variables: if useEtBins: etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et' self.__setBranchAddress(t,etBranch,event) self._debug("Added branch: %s", etBranch) if not getRatesOnly: npEt = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs)) self._debug("Allocated npEt with size %r", npEt.shape) if useEtaBins: etaBranch = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta" self.__setBranchAddress(t,etaBranch,event) self._debug("Added branch: %s", etaBranch) if not getRatesOnly: npEta = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs)) self._debug("Allocated npEta with size %r", npEta.shape) # The base information holder, such as et, eta and pile-up if pileupRef is NotSet: if ringerOperation > 0: pileupRef = PileupReference.avgmu else: pileupRef = PileupReference.nvtx pileupRef = PileupReference.retrieve( pileupRef ) self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) ) if pileupRef is PileupReference.nvtx: pileupBranch = 'el_nPileupPrimaryVtx' pileupDataType = np.uint16 elif pileupRef is PileupReference.avgmu: pileupBranch = 'avgmu' pileupDataType = np.float32 else: raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef) baseInfoBranch = BaseInfo((etBranch, etaBranch, pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',), (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) ) baseInfo = [None, ] * baseInfoBranch.nInfo # Make sure all baseInfoBranch information is available: for idx in baseInfoBranch: self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event) # Allocate numpy to hold as many entries as possible: if not getRatesOnly: # Retrieve the rings information depending on ringer operation ringerBranch = "el_ringsE" if ringerOperation < 0 else \ "trig_L2_calo_rings" self.__setBranchAddress(t,ringerBranch,event) if ringerOperation > 0: if ringerOperation is RingerOperation.L2: for var in __l2trackBranches: self.__setBranchAddress(t,var,event) if standardCaloVariables: if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,): for var in __l2stdCaloBranches: self.__setBranchAddress(t, var, event) else: self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.", RingerOperation.tostring(ringerOperation)) t.GetEntry(0) npat = 0 if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if standardCaloVariables: npat+= 6 else: npat += ringConfig.max() if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): if ringerOperation is RingerOperation.L2: if useTRT: self._info("Using TRT information!") npat += 2 __l2trackBranches.append('trig_L2_el_nTRTHits') __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits') npat += 3 for var in __l2trackBranches: self.__setBranchAddress(t,var,event) self.__setBranchAddress(t,"trig_L2_el_pt",event) elif ringerOperation < 0: # Offline self._warning("Still need to implement tracking for the ringer offline.") npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size() nobs=nobs) ) self._debug("Allocated npPatterns with size %r", npPatterns.shape) # Add E_T, eta and luminosity information npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) ) for idx in baseInfoBranch] else: npPatterns = npCurrent.fp_array([]) npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch] ## Allocate the branch efficiency collectors: if getRates: if ringerOperation < 0: benchmarkDict = OrderedDict( [( RingerOperation.Offline_CutBased_Loose , 'el_loose' ), ( RingerOperation.Offline_CutBased_Medium , 'el_medium' ), ( RingerOperation.Offline_CutBased_Tight , 'el_tight' ), ( RingerOperation.Offline_LH_Loose , 'el_lhLoose' ), ( RingerOperation.Offline_LH_Medium , 'el_lhMedium' ), ( RingerOperation.Offline_LH_Tight , 'el_lhTight' ), ]) else: benchmarkDict = OrderedDict( [( RingerOperation.L2Calo , 'trig_L2_calo_accept' ), ( RingerOperation.L2 , 'trig_L2_el_accept' ), ( RingerOperation.EFCalo , 'trig_EF_calo_accept' ), ( RingerOperation.HLT , 'trig_EF_el_accept' ), ]) from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector branchEffCollectors = OrderedDict() branchCrossEffCollectors = OrderedDict() for key, val in benchmarkDict.iteritems(): branchEffCollectors[key] = list() branchCrossEffCollectors[key] = list() # Add efficincy branch: if getRates or getRatesOnly: self.__setBranchAddress(t,val,event) for etBin in range(nEtBins): if useBins: branchEffCollectors[key].append(list()) branchCrossEffCollectors[key].append(list()) for etaBin in range(nEtaBins): etBinArg = etBin if useBins else -1 etaBinArg = etaBin if useBins else -1 argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ] branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) ) if crossVal: branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) ) # etBin # etaBin # benchmark dict if self._logger.isEnabledFor( LoggingLevel.DEBUG ): self._debug( 'Retrieved following branch efficiency collectors: %r', [collector[0].printName for collector in traverse(branchEffCollectors.values())]) # end of (getRates) etaBin = 0; etBin = 0 step = int(entries/100) if int(entries/100) > 0 else 1 ## Start loop! self._info("There is available a total of %d entries.", entries) for entry in progressbar(range(entries), entries, step = step, logger = self._logger, prefix = "Looping over entries "): #self._verbose('Processing eventNumber: %d/%d', entry, entries) t.GetEntry(entry) # Check if it is needed to remove energy regions (this means that if not # within this range, it will be ignored as well for efficiency measuremnet) if event.el_et < offEtCut: self._verbose("Ignoring entry due to offline E_T cut.") continue # Add et distribution for all events if not monitoring is None: # Book all distribtions before the event selection self.__fillHistograms(monitoring,filterType,event,False) if ringerOperation > 0: # Remove events which didn't pass L1_calo if not supportTriggers and not event.trig_L1_accept: #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept) continue if event.trig_L1_emClus < l1EmClusCut: #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut) continue if event.trig_L2_calo_et < l2EtCut: #self._verbose("Ignoring entry due to L2Calo E_T cut.") continue if efEtCut is not None and event.trig_L2_calo_accept : # EF calo is a container, search for electrons objects with et > cut trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et) found=False for v in trig_EF_calo_et_list: if v < efEtCut: found=True if found: #self._verbose("Ignoring entry due to EFCalo E_T cut.") continue # Set discriminator target: target = Target.Unknown if reference is Reference.Truth: if event.mc_isElectron and event.mc_hasZMother: target = Target.Signal elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ): target = Target.Background elif reference is Reference.Off_Likelihood: if event.el_lhTight: target = Target.Signal elif not event.el_lhLoose: target = Target.Background elif reference is Reference.AcceptAll: target = Target.Signal if filterType is FilterType.Signal else Target.Background else: if event.el_tight: target = Target.Signal elif not event.el_loose: target = Target.Background # Run filter if it is defined if filterType and \ ( (filterType is FilterType.Signal and target != Target.Signal) or \ (filterType is FilterType.Background and target != Target.Background) or \ (target == Target.Unknown) ): #self._verbose("Ignoring entry due to filter cut.") continue # Add et distribution for all events if not monitoring is None: # Book all distributions after the event selection self.__fillHistograms(monitoring,filterType,event,True) # Retrieve base information: for idx in baseInfoBranch: lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx)) baseInfo[idx] = lInfo if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo # Retrieve dependent operation region if useEtBins: etBin = self.__retrieveBinIdx( etBins, baseInfo[0] ) if useEtaBins: etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) ) # Check if bin is within range (when not using bins, this will always be true): if (etBin < nEtBins and etaBin < nEtaBins): # Retrieve patterns: if not getRatesOnly: if useEtBins: npEt[cPos] = etBin if useEtaBins: npEta[cPos] = etaBin ## Retrieve calorimeter information: cPat = 0 caloAvailable = True if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if standardCaloVariables: patterns = [] if ringerOperation is RingerOperation.L2Calo: from math import cosh cosh_eta = cosh( event.trig_L2_calo_eta ) # second layer ratio between 3x7 7x7 rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277 base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1 # Ratio between first and second highest energy cells eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0 # ratio of energy in the first layer (hadronic particles should leave low energy) F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta ) # weta2 is calculated over the middle layer using 3 x 5 weta2 = event.trig_L2_calo_weta2 # wstot is calculated over the first layer using (typically) 20 strips wstot = event.trig_L2_calo_wstot # ratio between EM cluster and first hadronic layers: Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et # allocate patterns: patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1] for pat in patterns: npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat cPat += 1 # end of ringerOperation else: # Remove events without rings if getattr(event,ringerBranch).empty(): caloAvailable = False # Retrieve rings: if caloAvailable: try: patterns = stdvector_to_list( getattr(event,ringerBranch) ) lPat = len(patterns) if lPat == ringConfig[etaBin]: npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns else: oldEtaBin = etaBin if etaBin > 0 and ringConfig[etaBin - 1] == lPat: etaBin -= 1 elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat: etaBin += 1 npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns self._warning(("Recovered event which should be within eta bin (%d: %r) " "but was found to be within eta bin (%d: %r). " "Its read eta value was of %f."), oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2], etaBin, etaBins[etaBin:etaBin+2], np.fabs( getattr(event,etaBranch))) except ValueError: self._logger.error(("Patterns size (%d) do not match expected " "value (%d). This event eta value is: %f, and ringConfig is %r."), lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig ) continue else: if extractDet is Detector.Calorimetry: # Also display warning when extracting only calorimetry! self._warning("Rings not available") continue self._warning("Rings not available") continue cPat += ringConfig.max() # which calo variables # end of (extractDet needed calorimeter) # And track information: if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): if caloAvailable or extractDet is Detector.Tracking: if ringerOperation is RingerOperation.L2: # Retrieve nearest deta/dphi only, so we need to find each one is the nearest: if event.trig_L2_el_trkClusDeta.size(): clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) ) clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) ) bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) ) for var in __l2trackBranches: npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos] cPat += 1 else: #self._verbose("Ignoring entry due to track information not available.") continue #for var in __l2trackBranches: # npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan # cPat += 1 elif ringerOperation < 0: # Offline pass # caloAvailable or only tracking # end of (extractDet needs tracking) # end of (getRatesOnly) ## Retrieve rates information: if getRates: for branch in branchEffCollectors.itervalues(): if not useBins: branch.update(event) else: branch[etBin][etaBin].update(event) if crossVal: for branchCross in branchCrossEffCollectors.itervalues(): if not useBins: branchCross.update(event) else: branchCross[etBin][etaBin].update(event) # end of (getRates) # We only increment if this cluster will be computed cPos += 1 # end of (et/eta bins) # Limit the number of entries to nClusters if desired and possible: if not nClusters is None and cPos >= nClusters: break # for end ## Treat the rings information if not getRatesOnly: ## Remove not filled reserved memory space: if npPatterns.shape[npCurrent.odim] > cPos: npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim) ## Segment data over bins regions: # Also remove not filled reserved memory space: if useEtBins: npEt = npCurrent.delete( npEt, slice(cPos,None)) if useEtaBins: npEta = npCurrent.delete( npEta, slice(cPos,None)) # Treat npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npPatterns, ) data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npData) for npData in npBaseInfo] npBaseInfo = npCurrent.array( data, dtype=np.object ) else: npObject = npCurrent.array([], dtype=npCurrent.dtype) # not getRatesOnly if getRates: if crossVal: for etBin in range(nEtBins): for etaBin in range(nEtaBins): for branchCross in branchCrossEffCollectors.itervalues(): if not useBins: branchCross.finished() else: branchCross[etBin][etaBin].finished() # Print efficiency for each one for the efficiency branches analysed: for etBin in range(nEtBins) if useBins else range(1): for etaBin in range(nEtaBins) if useBins else range(1): for branch in branchEffCollectors.itervalues(): lBranch = branch if not useBins else branch[etBin][etaBin] self._info('%s',lBranch) if crossVal: for branchCross in branchCrossEffCollectors.itervalues(): lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin] lBranchCross.dump(self._debug, printSort = True, sortFcn = self._verbose) # for branch # for eta # for et # end of (getRates) outputs = [] #if not getRatesOnly: outputs.extend((npObject, npBaseInfo)) #if getRates: outputs.extend((branchEffCollectors, branchCrossEffCollectors)) #outputs = tuple(outputs) return outputs
def expandFolders(pathList, filters=None, logger=None, level=None): """ Expand all folders to the contained files using the filters on pathList Input arguments: -> pathList: a list containing paths to files and folders; filters; -> filters: return a list for each filter with the files contained on the list matching the filter glob. -> logger: whether to print progress using logger; -> level: logging level to print messages with logger; WARNING: This function is extremely slow and will severely decrease performance if used to expand base paths with several folders in it. """ if not isinstance(pathList, ( list, tuple, )): pathList = [pathList] from glob import glob if filters is None: filters = ['*'] if not (type(filters) in ( list, tuple, )): filters = [filters] retList = [[] for idx in range(len(filters))] from RingerCore import progressbar, traverse pathList = list( traverse([ glob(path) if '*' in path else path for path in traverse(pathList, simple_ret=True) ], simple_ret=True)) for path in progressbar(pathList, len(pathList), 'Expanding folders: ', 60, 50, True if logger is not None else False, logger=logger, level=level): path = expandPath(path) if not os.path.exists(path): raise ValueError("Cannot reach path '%s'" % path) if os.path.isdir(path): for idx, filt in enumerate(filters): cList = filter(lambda x: not (os.path.isdir(x)), [f for f in glob(os.path.join(path, filt))]) if cList: retList[idx].extend(cList) folders = [ os.path.join(path, f) for f in os.listdir(path) if os.path.isdir(os.path.join(path, f)) ] if folders: recList = expandFolders(folders, filters) if len(filters) is 1: recList = [recList] for l in recList: retList[idx].extend(l) else: for idx, filt in enumerate(filters): if path in glob(os.path.join(os.path.dirname(path), filt)): retList[idx].append(path) if len(filters) is 1: retList = retList[0] return retList