def auto(self): self._debug( "Using automatic configuration for dataframe specification.") # Check whether we can retrieve from the parser. from TuningTools.parsers.BaseModuleParser import dataframeParser import sys try: args, argv = dataframeParser.parse_known_args() if args.data_framework not in (None, NotSet): self.dataframe = args.data_framework # Consume option sys.argv = sys.argv[:1] + argv except (ArgumentError, ValueError) as e: self._debug("Ignored argument parsing error:\n %s", e) pass from RingerCore import csvStr2List, expandFolders if not self.configured() and not self.can_autoconfigure(): self._fatal( "Cannot auto-configure which dataframe to use because no sample was specified via the auto_retrieve_sample() method." ) elif not self.configured(): if isinstance(self._sample, dict): for key in self._sample: if 'elCand2_' in key: self.dataframe = DataframeEnum.SkimmedNtuple else: self.dataframe = DataframeEnum.PhysVal break elif self._sample and isinstance(self._sample, list): if not isinstance(self._sample[0], basestring): self._fatal( "Cannot autoconfigure dataframe using the following list: %r", self._sample) fList = csvStr2List(self._sample[0]) fList = expandFolders(fList) for inputFile in fList: self._checkFile(inputFile) if self.configured(): break elif isinstance(self._sample, basestring): if os.path.isdir(self._sample): fList = expandFolders(self._sample) for inputFile in fList: self._checkFile(inputFile) if self.configured(): break else: self._checkFile(self._sample) if not self.configured(): self._fatal("Couldn't autoconfigure using source: %r", self._sample)
def __call__( self, summaryInfoListCol, filenameWeightsList, filenameThresList, refBenchCol=None, configCol=None, muBins = [-999,999]): # treat ref benchmark collection if not refBenchCol: refBenchCol = [[refBenchCol]*len(summaryInfoListCol[0])]*len(summaryInfoListCol) if type(refBenchCol) is str: refBenchCol = [[refBenchCol]*len(summaryInfoListCol[0])]*len(summaryInfoListCol) if (len(refBenchCol) != len(summaryInfoListCol)) and (len(refBenchCol[0]) != len(summaryInfoListCol[0])): self._logger.fatal('RefBenchCol is not compatible with the definitions. See the help function') # treat config collections if not configCol: configCol=[[[]]*len(summaryInfoListCol[0])]*len(summaryInfoListCol) if type(configCol) is int: configCol=[[[configCol]]*len(summaryInfoListCol[0])]*len(summaryInfoListCol) if len(configCol) != len(summaryInfoListCol) and (len(configCol[0]) != len(summaryInfoListCol[0])): self._logger.fatal('ConfigCol is not compatible with the definitions. See the help function') # check if the numbers of paths in the line is compatible with the pileup grid if len(summaryInfoListCol[0]) != (len(muBins)-1): self._logger.fatal('The numbers of paths in the line is not compatible with the pileup bins') # Loop over all tuning pids for idx, summaryInfoList in enumerate(summaryInfoListCol): discrList = [] for jdx, path in enumerate(summaryInfoList): files = expandFolders(path) crossValGrid=[] for f in files: if f.endswith('.pic.gz'): crossValGrid.append(f) # get all models discrList.extend( self.getModels(crossValGrid, refBenchCol = refBenchCol[idx][jdx], configCol = configCol[idx][jdx], muBin = (muBins[jdx],muBins[jdx+1]) )) # export the configuration to root/py format self._exportModel.create_weights( discrList, filenameWeightsList[idx] ) self._exportModel.create_thresholds( discrList, filenameThresList[idx] )
def __init__(self, **kw): Logger.__init__(self, kw) # Retrieve all information needed self._fList = retrieve_kw(kw, 'inputFiles', NotSet) self._ofile = retrieve_kw(kw, 'outputFile', "histos.root") self._treePath = retrieve_kw(kw, 'treePath', NotSet) self._dataframe = retrieve_kw(kw, 'dataframe', DataframeEnum.SkimmedNtuple) self._nov = retrieve_kw(kw, 'nov', -1) self._fList = csvStr2List(self._fList) self._fList = expandFolders(self._fList) # Loading libraries if ROOT.gSystem.Load('libTuningTools') < 0: self._fatal("Could not load TuningTools library", ImportError) self._containersSvc = {} self._storegateSvc = NotSet import random import time random.seed(time.time()) # return a random number self._id = random.randrange(100000)
conflict_handler='resolve') parser.make_adjustments() emptyArgumentsPrintHelp(parser) ## Retrieve parser args: args = parser.parse_args(namespace=LoggerNamespace()) mainLogger = Logger.getModuleLogger(__name__, args.output_level) mainLogger.debug("Raw input files are:") if mainLogger.isEnabledFor(LoggingLevel.DEBUG): pprint(args.inputFiles) ## Treat special arguments if len(args.inputFiles) == 1: args.inputFiles = csvStr2List(args.inputFiles[0]) args.inputFiles = expandFolders(args.inputFiles) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor(LoggingLevel.VERBOSE): pprint(args.inputFiles) if args.binFilters is not NotSet: try: args.binFilters = str_to_class("TuningTools.CrossValidStat", args.binFilters) args.binFilters = getFilters(args.binFilters, args.inputFiles, printf=mainLogger.info) except (TypeError, AttributeError): args.binFilters = csvStr2List(args.binFilters) args.inputFiles = select(args.inputFiles, args.binFilters) if len(args.binFilters) is 1: args.inputFiles = [args.inputFiles]
] # Et Bins etBins = [15, 20, 30, 40, 50, 500000] # Eta bins etaBins = [0, 0.8, 1.37, 1.54, 2.5] # [Tight, Medium, Loose and VeryLoose] thrRelax = [-0.1, -0.1, 0, 0] ####################### Extract Ringer Configuration ######################### import numpy as np outputDict = dict() for idx, tuningName in enumerate(tuningNameList): files = expandFolders(basepath + "/" + pathList[idx]) crossValGrid = [] for path in files: if path.endswith(".pic"): crossValGrid.append(path) pprint(crossValGrid) pprint(configList[idx]) pprint(refBenchmarkList[idx]) c = CrossValidStatAnalysis.exportDiscrFiles( crossValGrid, RingerOperation.L2, triggerChains=tuningName, refBenchCol=refBenchmarkList[idx], EtBins=etBins, EtaBins=etaBins,
import sys if len(sys.argv)==1: parser.print_help() sys.exit(1) args = parser.parse_args( namespace = LoggerNamespace() ) from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse import numpy as np from TuningTools.coreDef import retrieve_npConstants npCurrent, _ = retrieve_npConstants() npCurrent.level = args.output_level logger = Logger.getModuleLogger( __name__, args.output_level ) files = expandFolders( args.inputs ) # FIXME *.npz from zipfile import BadZipfile for f in files: logger.info("Changing representation of file '%s'...", f) try: data = dict(load(f)) except BadZipfile, e: logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e)) continue logger.debug("Finished loading file '%s'...", f) for key in data: if key == 'W': for obj, idx, parent, _, _ in traverse(data[key], tree_types = (np.ndarray,), max_depth = 3):
required=False, default=4, help="The number of cores processor per job") import sys, os if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() defaultCore = os.environ["OMP_NUM_THREADS"] os.environ["OMP_NUM_THREADS"] = args.cores # Take all files fList = csvStr2List(args.fList) fList = expandFolders(fList) from TuningTools import GridJobFilter gridJobFilter = GridJobFilter() fList = gridJobFilter(fList) process_pipe = [] output_stack = [] import subprocess from pprint import pprint while len(fList) > 0: if len(process_pipe) < int(args.maxJobs): job_id = len(fList) f = fList.pop()
'ElectronHighEnergyVeryLooseConf', ] # Et Bins etBins = [15, 20, 30, 40, 50, 500000 ] # Eta bins etaBins = [0, 0.8 , 1.37, 1.54, 2.5] # [Tight, Medium, Loose and VeryLoose] thrRelax = [-0.1,-0.1,0,0] ####################### Extract Ringer Configuration ######################### import numpy as np outputDict=dict() for idx, tuningName in enumerate(tuningNameList): files = expandFolders(basepath+'/'+pathList[idx]) crossValGrid=[] for path in files: if path.endswith('.pic'): crossValGrid.append(path) pprint(crossValGrid) pprint(configList[idx]) pprint(refBenchmarkList[idx]) c = CrossValidStatAnalysis.exportDiscrFiles(crossValGrid, RingerOperation.L2, triggerChains=tuningName, refBenchCol=refBenchmarkList[idx], EtBins = etBins, EtaBins = etaBins, configCol=configList[idx])
import sys, os if len(sys.argv)==1: mainFilterParser.print_help() sys.exit(1) mainLogger = Logger.getModuleLogger( __name__, LoggingLevel.INFO ) mainLogger.info('Start ntuple extraction...') # retrieve args args=mainFilterParser.parse_args() # Treat special arguments if len( args.inputFiles ) == 1: args.inputFiles = csvStr2List( args.inputFiles[0] ) args.inputFiles = expandFolders( args.inputFiles ) mainLogger.verbose("All input files are:") pprint(args.inputFiles) if '*' in args.output: output = args.output.replace('*', args.trigger.replace('HLT_','')) else: output = args.output # Copy the tree to an slim file obj = CopyTree( output ) if obj( args.inputFiles, args.basepath, args.trigger, args.treename) : obj.save() else: if os.path.exists( output ): os.system( ('rm -rf %s')%(output) )
setrootcore_opts = '--grid --ncpus=1 --no-color;'.format( CORES=args.multi_thread.get()) tunedDataStr = "@input.csv" debug = (args.get_job_submission_option('debug') is not None) crossValStatAnalysis = '\$ROOTCOREBIN/user_scripts/TuningTools/standalone/crossValStatAnalysis.py' elif clusterManagerConf() in ( ClusterManager.PBS, ClusterManager.LSF, ): # if args.outputFileBase: args.outputFileBase = os.path.join(args.outputDir.args.outputFileBase) else: args.outputFileBase = os.path.join(args.outputDir, 'crossValStat') from itertools import repeat files = expandFolders(args.discrFiles) from TuningTools import MixedJobBinnedFilter ffilter = MixedJobBinnedFilter() jobFilters = ffilter(files) mainLogger.info('Found following filters: %r', jobFilters) jobFileCollection = select(files, jobFilters, popListInCaseOneItem=False) nFilesCollection = [len(l) for l in jobFileCollection] tunedDataStr, refStr, binFilterStr = args.discrFiles, args.refFile, '--binFilters {BIN_FILTER}' debug = args.test rootcorebin = os.environ.get('ROOTCOREBIN') crossValStatAnalysis = os.path.join( rootcorebin, 'user_scripts/TuningTools/standalone/crossValStatAnalysis.py') #setrootcore = 'source ' + os.path.join(rootcorebin, '../setrootcore.sh;') setrootcore = '' setrootcore_opts = ''
] parser = ArgumentParser() parser.add_argument('--inFolderList', nargs='+', required=True, help="Input container to retrieve data") parser.add_argument('--signalDS', action='store_true', help="Whether the dataset contains TPNtuple") parser.add_argument('--outfile', action='store', default="mergedOutput.root", help="Name of the output file") parser.add_argument('--triggerList', nargs='+', default=defaultTrigList, help="Trigger list to keep on the filtered file.") args = parser.parse_args() mainLogger = Logger.getModuleLogger(__name__, LoggingLevel.INFO) files = expandFolders(args.inFolderList) rFile = RootFile(files, args.outfile) rFile.dump('Offline/Egamma/Ntuple', ['electron']) rFile.dump('Trigger/HLT/Egamma/Ntuple', args.triggerList) if args.signalDS: rFile.dump('Trigger/HLT/Egamma/TPNtuple', args.triggerList) rFile.save()
def __call__(self, fList, ringerOperation, **kw): """ Read ntuple and return patterns and efficiencies. Arguments: - fList: The file path or file list path. It can be an argument list of two types: o List: each element is a string path to the file; o Comma separated string: each path is separated via a comma o Folders: Expand folders recursively adding also files within them to analysis - ringerOperation: Set Operation type. It can be both a string or the RingerOperation Optional arguments: - filterType [None]: whether to filter. Use FilterType enumeration - reference [Truth]: set reference for targets. Use Reference enumeration - treePath [Set using operation]: set tree name on file, this may be set to use different sources then the default. Default for: o Offline: Offline/Egamma/Ntuple/electron o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger - offEtCut [None]: Set Offline cluster energy cut value - nClusters [None]: Read up to nClusters. Use None to run for all clusters. - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters. - etBins [None]: E_T bins (GeV) where the data should be segmented - etaBins [None]: eta bins where the data should be segmented - ringConfig [100]: A list containing the number of rings available in the data for each eta bin. - crossVal [None]: Whether to measure benchmark efficiency splitting it by the crossVal-validation datasets - extractDet [None]: Which detector to export (use Detector enumeration). Defaults are: o L2Calo: Calorimetry o L2: Tracking o Offline: Calorimetry o Others: CaloAndTrack - standardCaloVariables [False]: Whether to extract standard track variables. - useTRT [False]: Whether to export TRT information when dumping track variables. - supportTriggers [True]: Whether reading data comes from support triggers """ __eventBranches = [ 'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber', 'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition', 'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y', 'isTagTag' ] __trackBranches = [ 'elCand2_deltaeta1', 'elCand2_DeltaPOverP', 'elCand2_deltaphiRescaled', 'elCand2_d0significance', 'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT' ] __monteCarloBranches = [ 'type', 'origin', 'originbkg', 'typebkg', 'isTruthElectronFromZ', 'TruthParticlePdgId', 'firstEgMotherPdgId', 'TruthParticleBarcode', 'firstEgMotherBarcode', 'MotherPdgId', 'MotherBarcode', 'FirstEgMotherTyp', 'FirstEgMotherOrigin', 'dRPdgId', ] __onlineBranches = ['match', 'ringerMatch', 'ringer_rings'] __offlineBranches = ['et', 'eta'] # The current pid map used as offline reference pidConfigs = { key: value for key, value in RingerOperation.efficiencyBranches().iteritems() if key in (RingerOperation.Offline_LH_Tight, RingerOperation.Offline_LH_Medium, RingerOperation.Offline_LH_Loose, RingerOperation.Offline_LH_VeryLoose) } # Retrieve information from keyword arguments filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter) reference = retrieve_kw(kw, 'reference', Reference.AcceptAll) offEtCut = retrieve_kw(kw, 'offEtCut', None) l2EtCut = retrieve_kw(kw, 'l2EtCut', None) treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate') nClusters = retrieve_kw(kw, 'nClusters', None) etBins = retrieve_kw(kw, 'etBins', None) etaBins = retrieve_kw(kw, 'etaBins', None) crossVal = retrieve_kw(kw, 'crossVal', None) ringConfig = retrieve_kw(kw, 'ringConfig', 100) monitoring = retrieve_kw(kw, 'monitoring', None) pileupRef = retrieve_kw(kw, 'pileupRef', NotSet) getRates = retrieve_kw(kw, 'getRates', True) getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False) getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False) extractDet = retrieve_kw(kw, 'extractDet', None) import ROOT #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C"); #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C') if ROOT.gSystem.Load('libTuningTools') < 0: self._fatal("Could not load TuningTools library", ImportError) if 'level' in kw: self.level = kw.pop('level') # and delete it to avoid mistakes: checkForUnusedVars(kw, self._warning) del kw ### Parse arguments # Also parse operation, check if its type is string and if we can # transform it to the known operation enum: fList = csvStr2List(fList) fList = expandFolders(fList) ringerOperation = RingerOperation.retrieve(ringerOperation) reference = Reference.retrieve(reference) # Offline E_T cut if offEtCut: offEtCut = 1000. * offEtCut # Put energy in MeV # Check whether using bins useBins = False useEtBins = False useEtaBins = False nEtaBins = 1 nEtBins = 1 if etaBins is None: etaBins = npCurrent.fp_array([]) if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins) if etBins is None: etBins = npCurrent.fp_array([]) if type(etBins) is list: etBins = npCurrent.fp_array(etBins) if etBins.size: etBins = etBins * 1000. # Put energy in MeV nEtBins = len(etBins) - 1 if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(( 'Number of et bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.' ), nEtBins, np.iinfo(npCurrent.scounter_dtype).max) # Flag that we are separating data through bins useBins = True useEtBins = True self._debug('E_T bins enabled.') if not type(ringConfig) is list and not type(ringConfig) is np.ndarray: ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1 if type(ringConfig) is list: ringConfig = npCurrent.int_array(ringConfig) if not len(ringConfig): self._fatal('Rings size must be specified.') if etaBins.size: nEtaBins = len(etaBins) - 1 if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(( 'Number of eta bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.' ), nEtaBins, np.iinfo(npCurrent.scounter_dtype).max) if len(ringConfig) != nEtaBins: self._fatal(( 'The number of rings configurations (%r) must be equal than ' 'eta bins (%r) region config'), ringConfig, etaBins) useBins = True useEtaBins = True self._debug('eta bins enabled.') else: self._debug('eta/et bins disabled.') # The base information holder, such as et, eta and pile-up if pileupRef is NotSet: if ringerOperation > 0: pileupRef = PileupReference.avgmu else: pileupRef = PileupReference.nvtx pileupRef = PileupReference.retrieve(pileupRef) self._info("Using '%s' as pile-up reference.", PileupReference.tostring(pileupRef)) # Candidates: (1) is tags and (2) is probes. Default is probes self._candIdx = 1 if getTagsOnly else 2 # Mutual exclusive arguments: if not getRates and getRatesOnly: self._logger.error( "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True." ) getRates = True ### Prepare to loop: t = ROOT.TChain(treePath) for inputFile in progressbar(fList, len(fList), logger=self._logger, prefix="Creating collection tree "): # Check if file exists f = ROOT.TFile.Open(inputFile, 'read') if not f or f.IsZombie(): self._warning('Couldn' 't open file: %s', inputFile) continue # Inform user whether TTree exists, and which options are available: self._debug("Adding file: %s", inputFile) obj = f.Get(treePath) if not obj: self._warning("Couldn't retrieve TTree (%s)!", treePath) self._info("File available info:") f.ReadAll() f.ReadKeys() f.ls() continue elif not isinstance(obj, ROOT.TTree): self._fatal("%s is not an instance of TTree!", treePath, ValueError) t.Add(inputFile) # Turn all branches off. t.SetBranchStatus("*", False) # RingerPhysVal hold the address of required branches event = ROOT.SkimmedNtuple() # Ready to retrieve the total number of events t.GetEntry(0) ## Allocating memory for the number of entries entries = t.GetEntries() nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \ else nClusters ## Retrieve the dependent operation variables: if useEtBins: etBranch = ('elCand%d_et') % ( self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % ( self._candIdx) self.__setBranchAddress(t, etBranch, event) self._debug("Added branch: %s", etBranch) npEt = npCurrent.scounter_zeros( shape=npCurrent.shape(npat=1, nobs=nobs)) self._debug("Allocated npEt with size %r", npEt.shape) if useEtaBins: etaBranch = ('elCand%d_eta') % ( self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % ( self._candIdx) self.__setBranchAddress(t, etaBranch, event) self._debug("Added branch: %s", etaBranch) npEta = npCurrent.scounter_zeros( shape=npCurrent.shape(npat=1, nobs=nobs)) self._debug("Allocated npEta with size %r", npEta.shape) if reference is Reference.Truth: self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') % (self._candIdx), event) for var in __offlineBranches: self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var), event) #for var in pidConfigs.values(): # self.__setBranchAddress(t,var,event) for var in __trackBranches: self.__setBranchAddress(t, var, event) # Add online branches if using Trigger if ringerOperation > 0: for var in __onlineBranches: self.__setBranchAddress(t, ('fcCand%d_%s') % (self._candIdx, var), event) else: self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, 'ringer_rings'), event) if pileupRef is PileupReference.nvtx: pileupBranch = 'Nvtx' pileupDataType = np.uint16 elif pileupRef is PileupReference.avgmu: pileupBranch = 'averageIntPerXing' pileupDataType = np.float32 else: raise NotImplementedError( "Pile-up reference %r is not implemented." % pileupRef) #for var in __eventBranches + for var in [pileupBranch]: self.__setBranchAddress(t, var, event) ### Allocate memory if extractDet == (Detector.Calorimetry): npat = ringConfig.max() elif extractDet == (Detector.Tracking): npat = len(__trackBranches) # NOTE: Check if pat is correct for both Calo and track data elif extractDet in (Detector.CaloAndTrack, Detector.All): npat = ringConfig.max() + len(__trackBranches) npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape( npat=npat, #getattr(event, ringerBranch).size() nobs=nobs)) self._debug("Allocated npPatterns with size %r", npPatterns.shape) baseInfoBranch = BaseInfo( (etBranch, etaBranch, pileupBranch), (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType)) baseInfo = [ None, ] * baseInfoBranch.nInfo # Add E_T, eta and luminosity information npBaseInfo = [ npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs), dtype=baseInfoBranch.dtype(idx)) for idx in baseInfoBranch ] from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector branchEffCollectors = OrderedDict() branchCrossEffCollectors = OrderedDict() if ringerOperation < 0: from operator import itemgetter benchmarkDict = OrderedDict( sorted([(key, value) for key, value in RingerOperation.efficiencyBranches().iteritems() if key < 0 and not (isinstance(value, (list, tuple)))], key=itemgetter(0))) else: benchmarkDict = OrderedDict() for key, val in benchmarkDict.iteritems(): branchEffCollectors[key] = list() branchCrossEffCollectors[key] = list() # Add efficincy branch: if ringerOperation < 0: self.__setBranchAddress(t, val, event) for etBin in range(nEtBins): if useBins: branchEffCollectors[key].append(list()) branchCrossEffCollectors[key].append(list()) for etaBin in range(nEtaBins): etBinArg = etBin if useBins else -1 etaBinArg = etaBin if useBins else -1 argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ] branchEffCollectors[key][etBin].append( BranchEffCollector(*argList)) if crossVal: branchCrossEffCollectors[key][etBin].append( BranchCrossEffCollector(entries, crossVal, *argList)) # etBin # etaBin # benchmark dict if self._logger.isEnabledFor(LoggingLevel.DEBUG): self._debug( 'Retrieved following branch efficiency collectors: %r', [ collector[0].printName for collector in traverse(branchEffCollectors.values()) ]) etaBin = 0 etBin = 0 step = int(entries / 100) if int(entries / 100) > 0 else 1 ## Start loop! self._info("There is available a total of %d entries.", entries) cPos = 0 ### Loop over entries for entry in progressbar(range(entries), entries, step=step, logger=self._logger, prefix="Looping over entries "): self._verbose('Processing eventNumber: %d/%d', entry, entries) t.GetEntry(entry) #print self.__getEt(event) if event.elCand2_et < offEtCut: self._debug( "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV", event.elCand2_et, offEtCut) continue # Add et distribution for all events if ringerOperation > 0: if event.fcCand2_et < l2EtCut: self._debug("Ignoring entry due Fast Calo E_T cut.") continue # Add et distribution for all events # Set discriminator target: target = Target.Unknown # Monte Carlo cuts if reference is Reference.Truth: if getattr(event, ('elCand%d_isTruthElectronFromZ') % (self._candIdx)): target = Target.Signal elif not getattr(event, ('elCand%d_isTruthElectronFromZ') % (self._candIdx)): target = Target.Background # Offline Likelihood cuts elif reference is Reference.Off_Likelihood: if getattr(event, pidConfigs[RingerOperation.Offline_LH_Tight]): target = Target.Signal elif not getattr( event, pidConfigs[RingerOperation.Offline_LH_VeryLoose]): target = Target.Background # By pass everything (Default) elif reference is Reference.AcceptAll: target = Target.Signal if filterType is FilterType.Signal else Target.Background # Run filter if it is defined if filterType and \ ( (filterType is FilterType.Signal and target != Target.Signal) or \ (filterType is FilterType.Background and target != Target.Background) or \ (target == Target.Unknown) ): #self._verbose("Ignoring entry due to filter cut.") continue ## Retrieve base information and rings: for idx in baseInfoBranch: lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx)) baseInfo[idx] = lInfo # Retrieve dependent operation region if useEtBins: etBin = self.__retrieveBinIdx(etBins, baseInfo[0]) if useEtaBins: etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1])) # Check if bin is within range (when not using bins, this will always be true): if (etBin < nEtBins and etaBin < nEtaBins): if useEtBins: npEt[cPos] = etBin if useEtaBins: npEta[cPos] = etaBin # Online operation cPat = 0 caloAvailable = True if ringerOperation > 0 and self.__get_ringer_onMatch( event) < 1: continue # TODO Treat case where we don't use rings energy # Check if the rings empty if self.__get_rings_energy(event, ringerOperation).empty(): self._debug( 'No rings available in this event. Skipping...') caloAvailable = False # Retrieve rings: if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if caloAvailable: try: pass patterns = stdvector_to_list( self.__get_rings_energy( event, ringerOperation)) lPat = len(patterns) if lPat == ringConfig[etaBin]: npPatterns[npCurrent.access( pidx=slice(cPat, ringConfig[etaBin]), oidx=cPos)] = patterns else: oldEtaBin = etaBin if etaBin > 0 and ringConfig[etaBin - 1] == lPat: etaBin -= 1 elif etaBin + 1 < len( ringConfig) and ringConfig[etaBin + 1] == lPat: etaBin += 1 npPatterns[npCurrent.access( pidx=slice(cPat, ringConfig[etaBin]), oidx=cPos)] = patterns self._warning(( "Recovered event which should be within eta bin (%d: %r) " "but was found to be within eta bin (%d: %r). " "Its read eta value was of %f."), oldEtaBin, etaBins[oldEtaBin:oldEtaBin + 2], etaBin, etaBins[etaBin:etaBin + 2], np.fabs(getattr( event, etaBranch))) except ValueError: self._logger.error(( "Patterns size (%d) do not match expected " "value (%d). This event eta value is: %f, and ringConfig is %r." ), lPat, ringConfig[etaBin], np.fabs( getattr(event, etaBranch)), ringConfig) continue cPat += ringConfig[etaBin] else: # Also display warning when extracting only calorimetry! self._warning("Rings not available") continue if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): for var in __trackBranches: npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] = getattr( event, var) if var == 'elCand2_eProbabilityHT': from math import log TRT_PID = npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] epsilon = 1e-99 if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15 elif TRT_PID <= 0.0: TRT_PID = epsilon tau = 15.0 TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0) npPatterns[npCurrent.access(pidx=cPat, oidx=cPos)] = TRT_PID cPat += 1 ## Retrieve rates information: if getRates and ringerOperation < 0: #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 ) event.elCand2_isEMLoose2015 = not ( event.elCand2_isEMLoose2015 & 34896) event.elCand2_isEMMedium2015 = not ( event.elCand2_isEMMedium2015 & 276858960) event.elCand2_isEMTight2015 = not ( event.elCand2_isEMTight2015 & 281053264) for branch in branchEffCollectors.itervalues(): if not useBins: branch.update(event) else: branch[etBin][etaBin].update(event) if crossVal: for branchCross in branchCrossEffCollectors.itervalues( ): if not useBins: branchCross.update(event) else: branchCross[etBin][etaBin].update(event) # end of (getRates) if not monitoring is None: self.__fillHistograms(monitoring, filterType, pileupRef, pidConfigs, event) # We only increment if this cluster will be computed cPos += 1 # end of (et/eta bins) # Limit the number of entries to nClusters if desired and possible: if not nClusters is None and cPos >= nClusters: break # for end ## Treat the rings information ## Remove not filled reserved memory space: if npPatterns.shape[npCurrent.odim] > cPos: npPatterns = np.delete(npPatterns, slice(cPos, None), axis=npCurrent.odim) ## Segment data over bins regions: # Also remove not filled reserved memory space: if useEtBins: npEt = npCurrent.delete(npEt, slice(cPos, None)) if useEtaBins: npEta = npCurrent.delete(npEta, slice(cPos, None)) # Treat standardCaloVariables = False npObject = self.treatNpInfo( cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npPatterns, ) data = [ self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npData) for npData in npBaseInfo ] npBaseInfo = npCurrent.array(data, dtype=np.object) if getRates: if crossVal: for etBin in range(nEtBins): for etaBin in range(nEtaBins): for branchCross in branchCrossEffCollectors.itervalues( ): if not useBins: branchCross.finished() else: branchCross[etBin][etaBin].finished() # Print efficiency for each one for the efficiency branches analysed: for etBin in range(nEtBins) if useBins else range(1): for etaBin in range(nEtaBins) if useBins else range(1): for branch in branchEffCollectors.itervalues(): lBranch = branch if not useBins else branch[etBin][ etaBin] self._info('%s', lBranch) if crossVal: for branchCross in branchCrossEffCollectors.itervalues( ): lBranchCross = branchCross if not useBins else branchCross[ etBin][etaBin] lBranchCross.dump(self._debug, printSort=True, sortFcn=self._verbose) # for branch # for eta # for et else: branchEffCollectors = None branchCrossEffCollectors = None # end of (getRates) outputs = [] outputs.extend((npObject, npBaseInfo)) if getRates: outputs.extend((branchEffCollectors, branchCrossEffCollectors)) return outputs
referenceBenchCol = [['Pd','SP','Pf'], ['Pd','SP','Pf'], ['Pd','SP','Pf']] # Et Bins etBins = [ 20, 30, 40, 50, 500000 ] # Eta bins etaBins = [ 0, 0.8 , 1.37, 1.54, 2.5 ] # [Tight, Medium, Loose and VeryLoose] thrRelax = [0,0,0,0] ####################### Extract Ringer Configuration ######################### import numpy as np for path, referenceBench, configCol in zip(pathList, referenceBenchCol, configList): files = expandFolders( os.path.join( basepath, path ), '*.pic') for conf, ref in zip(configCol, referenceBench): refBenchmark = [[ref] * len(conf)]*len(conf[0]) c = CrossValidStatAnalysis.exportDiscrFiles( sorted(files) , RingerOperation.Offline , refBenchCol = ref , EtBins = etBins , EtaBins = etaBins , configCol = conf #, level = LoggingLevel.VERBOSE ) ###########################################################################
referenceBenchCol = [['Pd','SP','Pf'], ['Pd','SP','Pf'], ['Pd','SP','Pf']] # Et Bins etBins = [ 20, 30, 40, 50, 500000 ] # Eta bins etaBins = [ 0, 0.8 , 1.37, 1.54, 2.5 ] # [Tight, Medium, Loose and VeryLoose] thrRelax = [0,0,0,0] ####################### Extract Ringer Configuration ######################### import numpy as np for path, referenceBench, configCol in zip(pathList, referenceBenchCol, configList): files = expandFolders( os.path.join( basepath, path ), '*.pic.gz') for conf, ref in zip(configCol, referenceBench): refBenchmark = [[ref] * len(conf)]*len(conf[0]) c = CrossValidStatAnalysis.exportDiscrFiles( sorted(files) , RingerOperation.Offline , refBenchCol = ref , EtBins = etBins , EtaBins = etaBins , configCol = conf #, level = LoggingLevel.VERBOSE ) ###########################################################################
'ElectronHighEnergyTightConf', 'ElectronHighEnergyMediumConf', 'ElectronHighEnergyLooseConf', 'ElectronHighEnergyVeryLooseConf', ] # Et Bins etBins = [3, 7, 10, 15] # Eta bins etaBins = [0, 0.8, 1.37, 1.54, 2.37, 2.5] ####################### Extract Ringer Configuration ######################### import numpy as np outputDict = dict() files = expandFolders(basepath) crossValGrid = [] for path in files: if path.endswith('.pic.gz'): crossValGrid.append(path) pprint(crossValGrid) pprint(configList[0]) pprint(refBenchmarkList[0]) d = CrossValidStatAnalysis.exportDiscrFiles(crossValGrid, RingerOperation.L2, triggerChains=tuningNameList[0], refBenchCol=refBenchmarkList[0], nEtBins=len(etBins), nEtaBins=len(etaBins), configCol=configList[0])
action='store', metavar='INPUT', nargs='+', help="Files to change representation") emptyArgumentsPrintHelp(parser) args = parser.parse_args(namespace=LoggerNamespace()) from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse import numpy as np from TuningTools.coreDef import npCurrent npCurrent.level = args.output_level logger = Logger.getModuleLogger(__name__, args.output_level) files = expandFolders(args.inputs) # FIXME *.npz from zipfile import BadZipfile for f in files: logger.info("Changing representation of file '%s'...", f) try: data = dict(load(f)) except BadZipfile, e: logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e)) continue logger.debug("Finished loading file '%s'...", f) for key in data: if key == 'W': for obj, idx, parent, _, _ in traverse(data[key], tree_types=(np.ndarray, ), max_depth=3):
#etBins = [0, 30, 40, 50, 100000 ] #etaBins = [0, 0.8 , 1.37, 1.54, 2.5] etBins = [0, 30] etaBins = [0, 0.8] from TuningTools import CrossValidArchieve with CrossValidArchieve( crossValPath ) as CVArchieve: crossVal = CVArchieve del CVArchieve from TuningTools import createData from TuningTools import Reference, RingerOperation from RingerCore import expandFolders createData( sgnFileList = expandFolders( basePath+'/'+sgnInputFile ), bkgFileList = expandFolders( basePath+'/'+bkgInputFile ), ringerOperation = RingerOperation.EFCalo, referenceSgn = Reference.Off_Likelihood, referenceBkg = Reference.Truth, treePath = treePath, output = outputFile, l1EmClusCut = 20, l2EtCut = 19, efEtCut = 24, #offEtCut = 24, #nClusters = 50, #getRatesOnly = args.getRatesOnly, etBins = etBins, etaBins = etaBins, #ringConfig = args.ringConfig
mainLogger = Logger.getModuleLogger(__name__) import sys if len(sys.argv)==1: parser.print_help() sys.exit(1) ## Retrieve parser args: args = parser.parse_args( namespace = LoggerNamespace() ) mainLogger.debug("Raw input files are:") if mainLogger.isEnabledFor( LoggingLevel.DEBUG ): pprint(args.inputFiles) ## Treat special arguments if len( args.inputFiles ) == 1: args.inputFiles = csvStr2List( args.inputFiles[0] ) args.inputFiles = expandFolders( args.inputFiles ) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor( LoggingLevel.VERBOSE ): pprint(args.inputFiles) if args.binFilters is not NotSet: try: args.binFilters = str_to_class( "TuningTools.CrossValidStat", args.binFilters ) args.binFilters = getFilters( args.binFilters, args.inputFiles, printf = mainLogger.info ) except TypeError: args.binFilters = csvStr2List( args.binFilters ) args.inputFiles = select( args.inputFiles, args.binFilters ) if len(args.binFilters) is 1: args.inputFiles = [args.inputFiles] else: args.inputFiles = [args.inputFiles]
if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args(namespace=LoggerNamespace()) from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse import numpy as np from TuningTools import retrieve_npConstants, fixPPCol npCurrent, _ = retrieve_npConstants() npCurrent.level = args.output_level logger = Logger.getModuleLogger(__name__, args.output_level) files = expandFolders(args.inputs) from zipfile import BadZipfile from copy import deepcopy for f in files: logger.info("Turning numpy matrix file '%s' into pre-processing file...", f) fileparts = f.split("/") folder = "/".join(fileparts[0:-1]) + "/" fname = fileparts[-1] try: data = dict(load(f)) except BadZipfile, e: logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e)) continue logger.debug("Finished loading file '%s'...", f)
parents=[mainParser, loggerParser], conflict_handler='resolve') parser.make_adjustments() emptyArgumentsPrintHelp(parser) ## Retrieve parser args: args = parser.parse_args(namespace=LoggerNamespace()) mainLogger.setLevel(args.output_level) if mainLogger.isEnabledFor(LoggingLevel.DEBUG): from pprint import pprint pprint(args.inputFiles) ## Treat special arguments if len(args.inputFiles) == 1: args.inputFiles = csvStr2List(args.inputFiles[0]) args.inputFiles = expandFolders(args.inputFiles) mainLogger.verbose("All input files are:") if mainLogger.isEnabledFor(LoggingLevel.VERBOSE): pprint(args.inputFiles) for inFile in progressbar(args.inputFiles, len(args.inputFiles), logger=mainLogger, prefix="Processing files "): # Treat output file name: from RingerCore import checkExtension, changeExtension, load, save if checkExtension(inFile, "tgz|tar.gz|pic"): cOutputName = changeExtension(inFile, '.mat') if args.change_output_folder: import os.path cOutputName = os.path.join(
def __call__( self, fList, ringerOperation, **kw): """ Read ntuple and return patterns and efficiencies. Arguments: - fList: The file path or file list path. It can be an argument list of two types: o List: each element is a string path to the file; o Comma separated string: each path is separated via a comma o Folders: Expand folders recursively adding also files within them to analysis - ringerOperation: Set Operation type. It can be both a string or the RingerOperation Optional arguments: - filterType [None]: whether to filter. Use FilterType enumeration - reference [Truth]: set reference for targets. Use Reference enumeration - treePath [Set using operation]: set tree name on file, this may be set to use different sources then the default. Default for: o Offline: Offline/Egamma/Ntuple/electron o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger - offEtCut [None]: Set Offline cluster energy cut value - nClusters [None]: Read up to nClusters. Use None to run for all clusters. - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters. - etBins [None]: E_T bins (GeV) where the data should be segmented - etaBins [None]: eta bins where the data should be segmented - ringConfig [100]: A list containing the number of rings available in the data for each eta bin. - crossVal [None]: Whether to measure benchmark efficiency splitting it by the crossVal-validation datasets - extractDet [None]: Which detector to export (use Detector enumeration). Defaults are: o L2Calo: Calorimetry o L2: Tracking o Offline: Calorimetry o Others: CaloAndTrack - standardCaloVariables [False]: Whether to extract standard track variables. - useTRT [False]: Whether to export TRT information when dumping track variables. - supportTriggers [True]: Whether reading data comes from support triggers """ # Offline information branches: __offlineBranches = ['el_et', 'el_eta', #'el_loose', #'el_medium', #'el_tight', 'el_lhLoose', 'el_lhMedium', 'el_lhTight', 'mc_hasMC', 'mc_isElectron', 'mc_hasZMother', 'el_nPileupPrimaryVtx', ] # Online information branches __onlineBranches = [] __l2stdCaloBranches = ['trig_L2_calo_et', 'trig_L2_calo_eta', 'trig_L2_calo_phi', 'trig_L2_calo_e237', # rEta 'trig_L2_calo_e277', # rEta 'trig_L2_calo_fracs1', # F1: fraction sample 1 'trig_L2_calo_weta2', # weta2 'trig_L2_calo_ehad1', # energy on hadronic sample 1 'trig_L2_calo_emaxs1', # eratio 'trig_L2_calo_e2tsts1', # eratio 'trig_L2_calo_wstot',] # wstot __l2trackBranches = [ # Do not add non patter variables on this branch list #'trig_L2_el_pt', #'trig_L2_el_eta', #'trig_L2_el_phi', #'trig_L2_el_caloEta', #'trig_L2_el_charge', #'trig_L2_el_nTRTHits', #'trig_L2_el_nTRTHiThresholdHits', 'trig_L2_el_etOverPt', 'trig_L2_el_trkClusDeta', 'trig_L2_el_trkClusDphi',] # Retrieve information from keyword arguments filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter ) reference = retrieve_kw(kw, 'reference', Reference.Truth ) l1EmClusCut = retrieve_kw(kw, 'l1EmClusCut', None ) l2EtCut = retrieve_kw(kw, 'l2EtCut', None ) efEtCut = retrieve_kw(kw, 'efEtCut', None ) offEtCut = retrieve_kw(kw, 'offEtCut', None ) treePath = retrieve_kw(kw, 'treePath', None ) nClusters = retrieve_kw(kw, 'nClusters', None ) getRates = retrieve_kw(kw, 'getRates', True ) getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False ) etBins = retrieve_kw(kw, 'etBins', None ) etaBins = retrieve_kw(kw, 'etaBins', None ) crossVal = retrieve_kw(kw, 'crossVal', None ) ringConfig = retrieve_kw(kw, 'ringConfig', 100 ) extractDet = retrieve_kw(kw, 'extractDet', None ) standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False ) useTRT = retrieve_kw(kw, 'useTRT', False ) supportTriggers = retrieve_kw(kw, 'supportTriggers', True ) monitoring = retrieve_kw(kw, 'monitoring', None ) pileupRef = retrieve_kw(kw, 'pileupRef', NotSet ) import ROOT, pkgutil #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C"); #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C') if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \ not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ): #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0: self._fatal("Could not load TuningTools library", ImportError) if 'level' in kw: self.level = kw.pop('level') # and delete it to avoid mistakes: checkForUnusedVars( kw, self._warning ) del kw ### Parse arguments # Mutual exclusive arguments: if not getRates and getRatesOnly: self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.") getRates = True # Also parse operation, check if its type is string and if we can # transform it to the known operation enum: fList = csvStr2List ( fList ) fList = expandFolders( fList ) ringerOperation = RingerOperation.retrieve(ringerOperation) reference = Reference.retrieve(reference) if isinstance(l1EmClusCut, str): l1EmClusCut = float(l1EmClusCut) if l1EmClusCut: l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV __onlineBranches.append( 'trig_L1_emClus' ) if l2EtCut: l2EtCut = 1000.*l2EtCut # Put energy in MeV __onlineBranches.append( 'trig_L2_calo_et' ) if efEtCut: efEtCut = 1000.*efEtCut # Put energy in MeV __onlineBranches.append( 'trig_EF_calo_et' ) if offEtCut: offEtCut = 1000.*offEtCut # Put energy in MeV __offlineBranches.append( 'el_et' ) if not supportTriggers: __onlineBranches.append( 'trig_L1_accept' ) # Check if treePath is None and try to set it automatically if treePath is None: treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \ 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH' # Check whether using bins useBins=False; useEtBins=False; useEtaBins=False nEtaBins = 1; nEtBins = 1 # Set the detector which we should extract the information: if extractDet is None: if ringerOperation < 0: extractDet = Detector.Calorimetry elif ringerOperation is RingerOperation.L2Calo: extractDet = Detector.Calorimetry elif ringerOperation is RingerOperation.L2: extractDet = Detector.Tracking else: extractDet = Detector.CaloAndTrack else: extractDet = Detector.retrieve( extractDet ) if etaBins is None: etaBins = npCurrent.fp_array([]) if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins) if etBins is None: etBins = npCurrent.fp_array([]) if type(etBins) is list: etBins=npCurrent.fp_array(etBins) if etBins.size: etBins = etBins * 1000. # Put energy in MeV nEtBins = len(etBins)-1 if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(('Number of et bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins, np.iinfo(npCurrent.scounter_dtype).max) # Flag that we are separating data through bins useBins=True useEtBins=True self._debug('E_T bins enabled.') if not type(ringConfig) is list and not type(ringConfig) is np.ndarray: ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1 if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig) if not len(ringConfig): self._fatal('Rings size must be specified.'); if etaBins.size: nEtaBins = len(etaBins)-1 if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max: self._fatal(('Number of eta bins (%d) is larger or equal than maximum ' 'integer precision can hold (%d). Increase ' 'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins, np.iinfo(npCurrent.scounter_dtype).max) if len(ringConfig) != nEtaBins: self._fatal(('The number of rings configurations (%r) must be equal than ' 'eta bins (%r) region config'),ringConfig, etaBins) useBins=True useEtaBins=True self._debug('eta bins enabled.') else: self._debug('eta/et bins disabled.') ### Prepare to loop: # Open root file t = ROOT.TChain(treePath) for inputFile in progressbar(fList, len(fList), logger = self._logger, prefix = "Creating collection tree "): # Check if file exists f = ROOT.TFile.Open(inputFile, 'read') if not f or f.IsZombie(): self._warning('Couldn''t open file: %s', inputFile) continue # Inform user whether TTree exists, and which options are available: self._debug("Adding file: %s", inputFile) obj = f.Get(treePath) if not obj: self._warning("Couldn't retrieve TTree (%s)!", treePath) self._info("File available info:") f.ReadAll() f.ReadKeys() f.ls() continue elif not isinstance(obj, ROOT.TTree): self._fatal("%s is not an instance of TTree!", treePath, ValueError) t.Add( inputFile ) # Turn all branches off. t.SetBranchStatus("*", False) # RingerPhysVal hold the address of required branches event = ROOT.RingerPhysVal() # Add offline branches, these are always needed cPos = 0 for var in __offlineBranches: self.__setBranchAddress(t,var,event) # Add online branches if using Trigger if ringerOperation > 0: for var in __onlineBranches: self.__setBranchAddress(t,var,event) ## Allocating memory for the number of entries entries = t.GetEntries() nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \ else nClusters ## Retrieve the dependent operation variables: if useEtBins: etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et' self.__setBranchAddress(t,etBranch,event) self._debug("Added branch: %s", etBranch) if not getRatesOnly: npEt = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs)) self._debug("Allocated npEt with size %r", npEt.shape) if useEtaBins: etaBranch = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta" self.__setBranchAddress(t,etaBranch,event) self._debug("Added branch: %s", etaBranch) if not getRatesOnly: npEta = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs)) self._debug("Allocated npEta with size %r", npEta.shape) # The base information holder, such as et, eta and pile-up if pileupRef is NotSet: if ringerOperation > 0: pileupRef = PileupReference.avgmu else: pileupRef = PileupReference.nvtx pileupRef = PileupReference.retrieve( pileupRef ) self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) ) if pileupRef is PileupReference.nvtx: pileupBranch = 'el_nPileupPrimaryVtx' pileupDataType = np.uint16 elif pileupRef is PileupReference.avgmu: pileupBranch = 'avgmu' pileupDataType = np.float32 else: raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef) baseInfoBranch = BaseInfo((etBranch, etaBranch, pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',), (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) ) baseInfo = [None, ] * baseInfoBranch.nInfo # Make sure all baseInfoBranch information is available: for idx in baseInfoBranch: self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event) # Allocate numpy to hold as many entries as possible: if not getRatesOnly: # Retrieve the rings information depending on ringer operation ringerBranch = "el_ringsE" if ringerOperation < 0 else \ "trig_L2_calo_rings" self.__setBranchAddress(t,ringerBranch,event) if ringerOperation > 0: if ringerOperation is RingerOperation.L2: for var in __l2trackBranches: self.__setBranchAddress(t,var,event) if standardCaloVariables: if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,): for var in __l2stdCaloBranches: self.__setBranchAddress(t, var, event) else: self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.", RingerOperation.tostring(ringerOperation)) t.GetEntry(0) npat = 0 if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if standardCaloVariables: npat+= 6 else: npat += ringConfig.max() if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): if ringerOperation is RingerOperation.L2: if useTRT: self._info("Using TRT information!") npat += 2 __l2trackBranches.append('trig_L2_el_nTRTHits') __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits') npat += 3 for var in __l2trackBranches: self.__setBranchAddress(t,var,event) self.__setBranchAddress(t,"trig_L2_el_pt",event) elif ringerOperation < 0: # Offline self._warning("Still need to implement tracking for the ringer offline.") npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size() nobs=nobs) ) self._debug("Allocated npPatterns with size %r", npPatterns.shape) # Add E_T, eta and luminosity information npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) ) for idx in baseInfoBranch] else: npPatterns = npCurrent.fp_array([]) npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch] ## Allocate the branch efficiency collectors: if getRates: if ringerOperation < 0: benchmarkDict = OrderedDict( [( RingerOperation.Offline_CutBased_Loose , 'el_loose' ), ( RingerOperation.Offline_CutBased_Medium , 'el_medium' ), ( RingerOperation.Offline_CutBased_Tight , 'el_tight' ), ( RingerOperation.Offline_LH_Loose , 'el_lhLoose' ), ( RingerOperation.Offline_LH_Medium , 'el_lhMedium' ), ( RingerOperation.Offline_LH_Tight , 'el_lhTight' ), ]) else: benchmarkDict = OrderedDict( [( RingerOperation.L2Calo , 'trig_L2_calo_accept' ), ( RingerOperation.L2 , 'trig_L2_el_accept' ), ( RingerOperation.EFCalo , 'trig_EF_calo_accept' ), ( RingerOperation.HLT , 'trig_EF_el_accept' ), ]) from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector branchEffCollectors = OrderedDict() branchCrossEffCollectors = OrderedDict() for key, val in benchmarkDict.iteritems(): branchEffCollectors[key] = list() branchCrossEffCollectors[key] = list() # Add efficincy branch: if getRates or getRatesOnly: self.__setBranchAddress(t,val,event) for etBin in range(nEtBins): if useBins: branchEffCollectors[key].append(list()) branchCrossEffCollectors[key].append(list()) for etaBin in range(nEtaBins): etBinArg = etBin if useBins else -1 etaBinArg = etaBin if useBins else -1 argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ] branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) ) if crossVal: branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) ) # etBin # etaBin # benchmark dict if self._logger.isEnabledFor( LoggingLevel.DEBUG ): self._debug( 'Retrieved following branch efficiency collectors: %r', [collector[0].printName for collector in traverse(branchEffCollectors.values())]) # end of (getRates) etaBin = 0; etBin = 0 step = int(entries/100) if int(entries/100) > 0 else 1 ## Start loop! self._info("There is available a total of %d entries.", entries) for entry in progressbar(range(entries), entries, step = step, logger = self._logger, prefix = "Looping over entries "): #self._verbose('Processing eventNumber: %d/%d', entry, entries) t.GetEntry(entry) # Check if it is needed to remove energy regions (this means that if not # within this range, it will be ignored as well for efficiency measuremnet) if event.el_et < offEtCut: self._verbose("Ignoring entry due to offline E_T cut.") continue # Add et distribution for all events if not monitoring is None: # Book all distribtions before the event selection self.__fillHistograms(monitoring,filterType,event,False) if ringerOperation > 0: # Remove events which didn't pass L1_calo if not supportTriggers and not event.trig_L1_accept: #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept) continue if event.trig_L1_emClus < l1EmClusCut: #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut) continue if event.trig_L2_calo_et < l2EtCut: #self._verbose("Ignoring entry due to L2Calo E_T cut.") continue if efEtCut is not None and event.trig_L2_calo_accept : # EF calo is a container, search for electrons objects with et > cut trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et) found=False for v in trig_EF_calo_et_list: if v < efEtCut: found=True if found: #self._verbose("Ignoring entry due to EFCalo E_T cut.") continue # Set discriminator target: target = Target.Unknown if reference is Reference.Truth: if event.mc_isElectron and event.mc_hasZMother: target = Target.Signal elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ): target = Target.Background elif reference is Reference.Off_Likelihood: if event.el_lhTight: target = Target.Signal elif not event.el_lhLoose: target = Target.Background elif reference is Reference.AcceptAll: target = Target.Signal if filterType is FilterType.Signal else Target.Background else: if event.el_tight: target = Target.Signal elif not event.el_loose: target = Target.Background # Run filter if it is defined if filterType and \ ( (filterType is FilterType.Signal and target != Target.Signal) or \ (filterType is FilterType.Background and target != Target.Background) or \ (target == Target.Unknown) ): #self._verbose("Ignoring entry due to filter cut.") continue # Add et distribution for all events if not monitoring is None: # Book all distributions after the event selection self.__fillHistograms(monitoring,filterType,event,True) # Retrieve base information: for idx in baseInfoBranch: lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx)) baseInfo[idx] = lInfo if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo # Retrieve dependent operation region if useEtBins: etBin = self.__retrieveBinIdx( etBins, baseInfo[0] ) if useEtaBins: etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) ) # Check if bin is within range (when not using bins, this will always be true): if (etBin < nEtBins and etaBin < nEtaBins): # Retrieve patterns: if not getRatesOnly: if useEtBins: npEt[cPos] = etBin if useEtaBins: npEta[cPos] = etaBin ## Retrieve calorimeter information: cPat = 0 caloAvailable = True if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack, Detector.All): if standardCaloVariables: patterns = [] if ringerOperation is RingerOperation.L2Calo: from math import cosh cosh_eta = cosh( event.trig_L2_calo_eta ) # second layer ratio between 3x7 7x7 rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277 base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1 # Ratio between first and second highest energy cells eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0 # ratio of energy in the first layer (hadronic particles should leave low energy) F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta ) # weta2 is calculated over the middle layer using 3 x 5 weta2 = event.trig_L2_calo_weta2 # wstot is calculated over the first layer using (typically) 20 strips wstot = event.trig_L2_calo_wstot # ratio between EM cluster and first hadronic layers: Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et # allocate patterns: patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1] for pat in patterns: npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat cPat += 1 # end of ringerOperation else: # Remove events without rings if getattr(event,ringerBranch).empty(): caloAvailable = False # Retrieve rings: if caloAvailable: try: patterns = stdvector_to_list( getattr(event,ringerBranch) ) lPat = len(patterns) if lPat == ringConfig[etaBin]: npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns else: oldEtaBin = etaBin if etaBin > 0 and ringConfig[etaBin - 1] == lPat: etaBin -= 1 elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat: etaBin += 1 npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns self._warning(("Recovered event which should be within eta bin (%d: %r) " "but was found to be within eta bin (%d: %r). " "Its read eta value was of %f."), oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2], etaBin, etaBins[etaBin:etaBin+2], np.fabs( getattr(event,etaBranch))) except ValueError: self._logger.error(("Patterns size (%d) do not match expected " "value (%d). This event eta value is: %f, and ringConfig is %r."), lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig ) continue else: if extractDet is Detector.Calorimetry: # Also display warning when extracting only calorimetry! self._warning("Rings not available") continue self._warning("Rings not available") continue cPat += ringConfig.max() # which calo variables # end of (extractDet needed calorimeter) # And track information: if extractDet in (Detector.Tracking, Detector.CaloAndTrack, Detector.All): if caloAvailable or extractDet is Detector.Tracking: if ringerOperation is RingerOperation.L2: # Retrieve nearest deta/dphi only, so we need to find each one is the nearest: if event.trig_L2_el_trkClusDeta.size(): clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) ) clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) ) bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) ) for var in __l2trackBranches: npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos] cPat += 1 else: #self._verbose("Ignoring entry due to track information not available.") continue #for var in __l2trackBranches: # npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan # cPat += 1 elif ringerOperation < 0: # Offline pass # caloAvailable or only tracking # end of (extractDet needs tracking) # end of (getRatesOnly) ## Retrieve rates information: if getRates: for branch in branchEffCollectors.itervalues(): if not useBins: branch.update(event) else: branch[etBin][etaBin].update(event) if crossVal: for branchCross in branchCrossEffCollectors.itervalues(): if not useBins: branchCross.update(event) else: branchCross[etBin][etaBin].update(event) # end of (getRates) # We only increment if this cluster will be computed cPos += 1 # end of (et/eta bins) # Limit the number of entries to nClusters if desired and possible: if not nClusters is None and cPos >= nClusters: break # for end ## Treat the rings information if not getRatesOnly: ## Remove not filled reserved memory space: if npPatterns.shape[npCurrent.odim] > cPos: npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim) ## Segment data over bins regions: # Also remove not filled reserved memory space: if useEtBins: npEt = npCurrent.delete( npEt, slice(cPos,None)) if useEtaBins: npEta = npCurrent.delete( npEta, slice(cPos,None)) # Treat npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npPatterns, ) data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins, nEtaBins, standardCaloVariables, ringConfig, npData) for npData in npBaseInfo] npBaseInfo = npCurrent.array( data, dtype=np.object ) else: npObject = npCurrent.array([], dtype=npCurrent.dtype) # not getRatesOnly if getRates: if crossVal: for etBin in range(nEtBins): for etaBin in range(nEtaBins): for branchCross in branchCrossEffCollectors.itervalues(): if not useBins: branchCross.finished() else: branchCross[etBin][etaBin].finished() # Print efficiency for each one for the efficiency branches analysed: for etBin in range(nEtBins) if useBins else range(1): for etaBin in range(nEtaBins) if useBins else range(1): for branch in branchEffCollectors.itervalues(): lBranch = branch if not useBins else branch[etBin][etaBin] self._info('%s',lBranch) if crossVal: for branchCross in branchCrossEffCollectors.itervalues(): lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin] lBranchCross.dump(self._debug, printSort = True, sortFcn = self._verbose) # for branch # for eta # for et # end of (getRates) outputs = [] #if not getRatesOnly: outputs.extend((npObject, npBaseInfo)) #if getRates: outputs.extend((branchEffCollectors, branchCrossEffCollectors)) #outputs = tuple(outputs) return outputs
#'e24_vloose_L1EM20VH', #'e5_loose_idperf', #'e5_lhloose_idperf', #'e5_tight_idperf', #'e5_lhtight_idperf', 'e24_medium_idperf_L1EM20VH', 'e24_lhmedium_idperf_L1EM20VH' ] parser = argparse.ArgumentParser() parser.add_argument('--inFolderList', nargs='+', required=True, help = "Input container to retrieve data") parser.add_argument('--signalDS', action='store_true', help = "Whether the dataset contains TPNtuple") parser.add_argument('--outfile', action='store', default="mergedOutput.root", help = "Name of the output file") parser.add_argument('--triggerList', nargs='+', default=defaultTrigList, help = "Trigger list to keep on the filtered file.") args=parser.parse_args() mainLogger = Logger.getModuleLogger( __name__, LoggingLevel.INFO ) files = expandFolders( args.inFolderList ) rFile = RootFile( files, args.outfile ) rFile.dump('Offline/Egamma/Ntuple', ['electron']) rFile.dump('Trigger/HLT/Egamma/Ntuple', args.triggerList) if args.signalDS: rFile.dump('Trigger/HLT/Egamma/TPNtuple', args.triggerList) rFile.save()
pidnames = [ ['Medium', 'VeryLoose'], ['Medium'], ['Medium', 'VeryLoose'], ['Tight', 'Medium', 'Loose', 'VeryLoose'], ['Medium', 'VeryLoose'], ] ####################### Extract Ringer Configuration ######################### for idx, cv in enumerate(crossval): tpath = os.getcwd() + '/' + tuningdirs[idx] mkdir_p(tpath) for jdx, pid in enumerate(pidnames[idx]): files = expandFolders(basepath + '/' + cv[jdx]) crossValGrid = [] for path in files: if path.endswith('.pic.gz'): crossValGrid.append(path) d = CrossValidStatAnalysis.exportDiscrFilesToOnlineFormat( crossValGrid, refBenchCol=ref, discrFilename='%s/ElectronRinger%sConstants' % (tpath, pid), thresFilename='%s/ElectronRinger%sThresholds' % (tpath, pid), version=4, )
parser.add_argument('-d', '--data', action='store', dest='data', required=True, nargs='+', help="The input tuning files.") import sys, os if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() # Take all files paths = csvStr2List(args.data) paths = expandFolders(paths) from RingerCore import load, save, appendToFileName for f in paths: ff = load(f) for k in ff.keys(): if 'SP' in k: etBin = ff[k]['etBinIdx'] etaBin = ff[k]['etaBinIdx'] print 'etBin = ', etBin, ', etaBin = ', etaBin outname = f.split('/')[len(f.split('/')) - 2] cOutputName = appendToFileName(outname, ('et%d_eta%d') % (etBin, etaBin)) save(ff, cOutputName, compress=True)
parser.print_help() sys.exit(1) # Retrieve parser args: args = parser.parse_args(namespace = LoggerNamespace() ) from RingerCore import Logger, LoggingLevel, printArgs logger = Logger.getModuleLogger( __name__, args.output_level ) printArgs( args, logger.debug ) #Find files from RingerCore import expandFolders, ensureExtension logger.info('Expand folders and filter') paths = expandFolders(args.file) paths = filterPaths(paths, args.grid) from pprint import pprint logger.info('Grid mode is: %s',args.grid) pprint(paths) from TuningTools import TuningDataArchieve try: logger.info(('Opening reference file with location: %s')%(args.refFile)) TDArchieve = TuningDataArchieve(args.refFile) with TDArchieve as data: patterns = data except:
parser.make_adjustments() emptyArgumentsPrintHelp(parser) # Retrieve parser args: args = parser.parse_args(namespace=LoggerNamespace()) from RingerCore import Logger, LoggingLevel, printArgs logger = Logger.getModuleLogger(__name__, args.output_level) printArgs(args, logger.debug) #Find files from RingerCore import expandFolders, ensureExtension, keyboard logger.info('Expand folders and filter') paths = expandFolders(args.file) print paths paths = filterPaths(paths, args.grid) from pprint import pprint logger.info('Grid mode is: %s', args.grid) pprint(paths) from TuningTools import MonitoringTool csummaryList = [] etBinMax = 0 etaBinMax = 0 #Loop over job grid, basically loop over user... for idx, jobID in enumerate(paths): logger.info(
metavar='INPUT', nargs='+', help="Files to change representation") emptyArgumentsPrintHelp(parser) args = parser.parse_args(namespace=LoggerNamespace()) from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse import numpy as np from TuningTools import npCurrent, fixPPCol npCurrent.level = args.output_level logger = Logger.getModuleLogger(__name__, args.output_level) files = expandFolders(args.inputs) from zipfile import BadZipfile from copy import deepcopy for f in files: logger.info("Turning numpy matrix file '%s' into pre-processing file...", f) fileparts = f.split('/') folder = '/'.join(fileparts[0:-1]) + '/' fname = fileparts[-1] try: data = dict(load(f)) except BadZipfile, e: logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e)) continue logger.debug("Finished loading file '%s'...", f)
'ElectronHighEnergyVeryLooseConf', ] # Et Bins etBins = [15, 20, 30, 40, 50, 500000] # Eta bins etaBins = [0, 0.8, 1.37, 1.54, 2.5] # [Tight, Medium, Loose and VeryLoose] thrRelax = [-0.1, -0.1, 0, 0] ####################### Extract Ringer Configuration ######################### import numpy as np outputDict = dict() for idx, tuningName in enumerate(tuningNameList): files = expandFolders(basepath + '/' + pathList[idx]) crossValGrid = [] for path in files: if path.endswith('.pic'): crossValGrid.append(path) pprint(crossValGrid) pprint(configList[idx]) pprint(refBenchmarkList[idx]) c = CrossValidStatAnalysis.exportDiscrFiles( crossValGrid, RingerOperation.L2, triggerChains=tuningName, refBenchCol=refBenchmarkList[idx], EtBins=etBins, EtaBins=etaBins,
#etBins = [0, 30, 40, 50, 100000 ] #etaBins = [0, 0.8 , 1.37, 1.54, 2.5] etBins = [0, 30] etaBins = [0, 0.8] from TuningTools import CrossValidArchieve with CrossValidArchieve(crossValPath) as CVArchieve: crossVal = CVArchieve del CVArchieve from TuningTools import createData from TuningTools import Reference, RingerOperation from RingerCore import expandFolders createData( sgnFileList=expandFolders(basePath + '/' + sgnInputFile), bkgFileList=expandFolders(basePath + '/' + bkgInputFile), ringerOperation=RingerOperation.EFCalo, referenceSgn=Reference.Off_Likelihood, referenceBkg=Reference.Truth, treePath=treePath, output=outputFile, l1EmClusCut=20, l2EtCut=19, efEtCut=24, #offEtCut = 24, #nClusters = 50, #getRatesOnly = args.getRatesOnly, etBins=etBins, etaBins=etaBins, #ringConfig = args.ringConfig