def Process ( self, entry ): """ Fills data set """ # ## == getting the next entry from the tree # if self.GetEntry ( entry ) <= 0 : return 0 ## RETURN # if not self._progress and not self._silence : self._total = self.fChain.GetEntries() self._logger.info ( "Processing TChain('%s') #entries: %d" % ( self.fChain.GetName() , self._total ) ) ## decoration: from Ostap.progress_bar import ProgressBar self._progress = ProgressBar ( max_value = self._total , silent = self._silence ) if not self._silence : if 0 == self._events % 1000 or 0 == entry % 1000 : self._progress.update_amount ( self.event () ) self._events += 1 # ## == for more convenience # bamboo = self.fChain # ## apply cuts (if needed) # if not self . _cuts ( bamboo ) : return 0 # ## loop over all varibales # for v in self._variables : var = v[0] ## variable vmin = v[2] ## min-value vmax = v[3] ## max-value vfun = v[4] ## accessor-function value = vfun ( bamboo ) if not vmin <= value <= vmax : ## MUST BE IN RANGE! self._skip += 1 return 0 ## RETURN var.setVal ( value ) self.data .add ( self.varset ) return 1
def __init__(self, files, description="", maxfiles=1000000, silent=False): # self.files = [] self.patterns = files self.description = description self.maxfiles = maxfiles self.silent = silent # if isinstance(files, str): files = [files] # _files = set() for pattern in files: _fs = self.globPattern(pattern) for _f in _fs: _files.add(_f) if not self.silent: logger.info('Loading: %s #patterns/files: %s/%d' % (self.description, len(files), len(_files))) from Ostap.progress_bar import ProgressBar with ProgressBar(max_value=len(_files), silent=self.silent) as bar: self.progress = bar for f in _files: if len(self.files) < self.maxfiles: self.treatFile(f) else: logger.warning('Maxfiles limit is reached %s ' % self.maxfiles) break if not self.silent: logger.info('Loaded: %s' % self)
def process(self, task, items, timeout=90000): if not isinstance(task, Task): raise TypeError("task argument needs to be an 'Task' instance") # --- Call the Local initialialization task.initializeLocal() # --- Schedule all the jobs .... if self.mode == 'cluster': from Ostap.progress_bar import ProgressBar with ProgressBar(max_value=len(items), silent=self.silent) as bar: jobs = self.pool.uimap(_ppfunction, zip([task for i in items], items)) ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('ROOT','Ostap.ParallelPathos')) for item in items] ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('Ostap.Parallel','time')) for item in items] ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (_ppfunction,), ('Ostap','time')) for item in items] for result, stat in jobs: bar += 1 task._mergeResults(result) self._mergeStatistics(stat) self._printStatistics() self.pp_stats() elif self.mode == 'multicore': start = time.time() from Ostap.progress_bar import ProgressBar with ProgressBar(max_value=len(items), silent=self.silent) as bar: jobs = self.pool.uimap(_ppfunction, zip([task for i in items], items)) for result, stat in jobs: bar += 1 task._mergeResults(result) self._mergeStatistics(stat) end = time.time() self._printStatistics() logger.info('Time elapsed since server creation %f' % (end - start)) # --- Call the Local Finalize task.finalize()
def _iter_cuts_(self, cuts, first=0, last=_large, progress=False): """ Iterator over ``good events'' in TTree/TChain: >>> tree = ... # get the tree >>> for i in tree.withCuts ( 'pt>5' ) : print i.y Attention: TTree::GetEntry is already invoked for accepted events, no need in second call """ # last = min(last, len(self)) if last < first: return pit = cpp.Analysis.PyIterator(self, cuts, first, last) if not pit.ok(): raise TypeError("Invalid Formula: %s" % cuts) # from Ostap.progress_bar import ProgressBar with ProgressBar(min_value=first, max_value=last, silent=not progress) as bar: step = 13.0 * max(bar.width, 101) / (last - first) _t = pit.tree() _o = _t while _t: yield _t _t = pit.next() ## advance to the next entry if progress: current = pit.current() - 1 ## get the current entry index if not _t \ or _t != _o \ or current - first < 120 \ or last - current < 120 \ or 0 == current % 100000 \ or 0 == int ( step * ( current - first ) ) % 5 : ## show progress bar bar.update_amount(current) _o = _t if progress: bar.update_amount(last) del pit
def __init__(self, files, description="", maxfiles=1000000, silent=False): # self.files = [] self.patterns = files self.description = description self.maxfiles = maxfiles self.silent = silent # if isinstance(files, str): files = [files] # _files = set() for pattern in files: ## experimental feature: try to match files on CERN EOS if 0 <= pattern.find('/eos/lhcb/'): if 0 <= pattern.find ( '*' ) or 0 <= pattern.find ( '?' ) or \ 0 <= pattern.find ( '[' ) or 0 <= pattern.find ( ']' ) : logger.warning( 'Globbing might not work for EOS-files "%s"' % pattern) from Ostap.EOS import EOS with EOS() as eos: for f in eos.iglob(pattern, root=True): _files.add(f) else: _files.add(pattern) else: for f in glob.iglob(pattern): _files.add(f) if not self.silent: logger.info('Loading: %s #patterns/files: %s/%d' % (self.description, len(files), len(_files))) from Ostap.progress_bar import ProgressBar with ProgressBar(max_value=len(_files), silent=self.silent) as bar: self.progress = bar for f in _files: if len(self.files) < self.maxfiles: self.treatFile(f) else: logger.warning('Maxfiles limit is reached %s ' % self.maxfiles) break if not self.silent: logger.info('Loaded: %s' % self)
class SelectorWithVars(SelectorWithCuts) : """ Create and fill the basic dataset for RooFit # # variables = [ ... ] # # ## add a variable 'my_name1' from the tree/chain # variables += [ # # name descriptor min-value , max-value # ( 'my_name1' , 'my_description1' , low , high ) # ] # # ## get a variable 'my_name' from the tree/chain with accessor function, # ## e.g. rescale it on-fligh # variables += [ # # name descriptor min-value , max-value , access function # ( 'my_name2' , 'my_description2' , low , high , lambda s : s.my_name2/1000 ) # ] # # ## get less trivial expression # variables += [ # # name descriptor min-value , max-value , access function # ( 'my_name3' , 'my_description3' , low , high , lambda s : s.var1+s.var2 ) # ] # # ## any function that gets Tchain/Tree and avaluated to double. # # e.g. it coudl be TMVAReader # def myvar ( chain ) : .... # variables += [ # # name descriptor min-value , max-value , access function # ( 'my_name4' , 'my_description4' , low , high , myvar ) # ] # # # ## add already booked variables: # v5 = ROOT.RooRealVal( 'my_name5' ) # variables += [ ( v5 , lambda s : s.var5 ) ] # # # ## add already booked variables: # v6 = ROOT.RooRealVal( 'my_name6' ) # variables += [ ( v6 ) ] ## get variable 'my_name6' # # # # # ## finally create selector # # # selector = SelectorWithVars ( # variables , # selection = ' chi2vx<30 && pt>2*GeV ' , ## filtering # ) # chain = ... # chain.process ( selector ) # dataset = selector.dataset # """ ## constructor def __init__ ( self , variables , ## list of variables selection , ## Tree-selection cuts = lambda s : True , name = '' , fullname = '' , silence = False ) : if not name : from Ostap.PyRoUts import dsID name = dsID() if not fullname : fullname = "%s/%s " % ( __name__ , name ) # ## create the logger # from Ostap.Logger import getLogger self._logger = getLogger ( fullname ) # # ## instantiate the base class # SelectorWithCuts.__init__ ( self , selection ) ## initialize the base # ## keep the cuts # self._cuts = cuts # ## variables # self.varset = ROOT.RooArgSet() self._variables = [] # ## add the variables one by one # for v in variables : self.addVariable ( *v ) # ## Book dataset # self.data = ROOT.RooDataSet ( ## name , fullname , ## self.varset ) # ## it is still very puzzling for me: should this line be here at all?? ROOT.SetOwnership ( self.data , False ) self._events = 0 self._progress = None self._total = 1 self._skip = 0 self._silence = silence ## delete the selector, try to clear and delete the dataset def __del__ ( self ) : # if hasattr ( self , 'data' ) and self.data : self.data.Clear() self.data.reset() # del self.data ## get the dataset def dataset ( self ) : """ Get the data-set """ return self.data ## the only one actually important method def Process ( self, entry ): """ Fills data set """ # ## == getting the next entry from the tree # if self.GetEntry ( entry ) <= 0 : return 0 ## RETURN # if not self._progress and not self._silence : self._total = self.fChain.GetEntries() self._logger.info ( "Processing TChain('%s') #entries: %d" % ( self.fChain.GetName() , self._total ) ) ## decoration: from Ostap.progress_bar import ProgressBar self._progress = ProgressBar ( max_value = self._total , silent = self._silence ) if not self._silence : if 0 == self._events % 1000 or 0 == entry % 1000 : self._progress.update_amount ( self.event () ) self._events += 1 # ## == for more convenience # bamboo = self.fChain # ## apply cuts (if needed) # if not self . _cuts ( bamboo ) : return 0 # ## loop over all varibales # for v in self._variables : var = v[0] ## variable vmin = v[2] ## min-value vmax = v[3] ## max-value vfun = v[4] ## accessor-function value = vfun ( bamboo ) if not vmin <= value <= vmax : ## MUST BE IN RANGE! self._skip += 1 return 0 ## RETURN var.setVal ( value ) self.data .add ( self.varset ) return 1 ## add declared variable to RooDataSet def addVariable ( self , var , *args ) : """ Add decared variable to RooDataSet """ if isinstance ( var , str ) : ## just the name of variable vname = var ## name vdesc = args[0] ## description vmin = args[1] ## min-value vmax = args[2] ## max-value # ## accessor function # if 3 < len ( args ) : vfun = args[3] else : vfun = lambda s : getattr( s , vname ) # var = ROOT.RooRealVar ( vname , vdesc , vmin , vmax ) elif isinstance ( var , ROOT.RooRealVar ) : # variable itself vname = var.GetName () ## name vdesc = var.GetTitle () ## description vmin = var.getMin () ## min-value vmax = var.getMax () ## max-value # ## accessor function # if 0 < len ( args ) : vfun = args[0] else : vfun = lambda s : getattr( s , vname ) else : self._logger.error ( 'Invalid variable description!' ) raise AttributeError ( 'Invalid variable description!' ) ## finally the entry self.varset.add ( var ) self._variables += [ ( var , vdesc , vmin , vmax , vfun ) ] # def Terminate ( self ) : # if self._progress : self._progress.end() # if not self._silence : self._logger.info ( 'Events Processed/Total/Skept %d/%d/%d\nCUTS: "%s"' % ( self._events , self._total , self._skip , self.cuts () ) ) self.data.Print('v') if not len ( self.data ) : self._logger.warning("Empty dataset!") ## if 0 != self.GetAbort() : self._logger.error('Process has been aborted!') # def Init ( self, chain ) : # if self._progress and not self._silence : self._progress.update_amount ( self.event () ) # return SelectorWithCuts.Init ( self , chain ) def Begin ( self , tree = None ) : ## if self._progress and not self._silence : self._progress.update_amount ( self.event () ) # def SlaveBegin ( self , tree ) : # if self._progress and not self._silence : self._progress.update_amount ( self.event () ) # def Notify ( self ) : # if self._progress and not self._silence : self._progress.update_amount ( self.event () ) def SlaveTerminate ( self ) : # if self._progress and not self._silence : self._progress.update_amount ( self.event () )
def GetTuple(self, chain): print( " ----> The following general mask will be applyied to the chain : \n" ) print('-- muon : {}'.format( self.filter_mask['daughter_mask'].split('**'))) print('-- Jpsi : {}'.format( self.filter_mask['mother_mask'].split('**'))) print('-- other : {}'.format(self.filter_mask['other'].split('**'))) print("\n *** You may also want to check \n" " *** AnnaTupleFilterJpsiPbPbV2::IsInLuminosityRegion() \n" " *** and AnnaTupleFilterJpsiPbPbV2::IsMuonsGhosts() \n" " *** where other cuts are also defined \n") ntuple = self.CreateTuple() okBranch = self.CheckChainBranch(chain) if okBranch is False: error(' attributes are missing') return None # counters entry_number = 0 entry_exlude = 0 tot_entries = chain.GetEntriesFast() muon_all = list() print(' --- Start running over events ...') with ProgressBar(max_value=tot_entries, silent=False) as bar: for entry in chain: entry_number += 1 bar.update_amount(entry_number) ok_lumi, v_OWNPV, v_ENDVERTEX = self.IsInLuminosityRegion( entry_number, entry) if ok_lumi is False: info("entry {} does not pass the luminosity cut".format( entry_number)) entry_exlude += 1 continue ok_muon = self.PassMuonCuts(entry_number, entry) if ok_muon is False: info("entry {} do not pass muons cut".format(entry_number)) entry_exlude += 1 continue # Check muon ghost probability is_ghost = self.IsMuonsGhosts(entry_number, entry, muon_all) if is_ghost is True: info("entry {} most likely have ghosts".format( entry_number)) entry_exlude += 1 continue ok_mother = self.PassMuonCuts(entry_number, entry) if ok_mother is False: info( "entry {} do not pass mother cut".format(entry_number)) entry_exlude += 1 continue # Prepare Data rho = v_OWNPV.Perp() v_OWNPV -= v_ENDVERTEX dZ = (getattr(entry, self.mother_leaf + '_ENDVERTEX_Z') - getattr(entry, self.mother_leaf + '_OWNPV_Z')) * 1e-3 tZ = dZ * 3096.916 / ( getattr(entry, self.mother_leaf + '_PZ') * TMath.C()) ntuple.Fill(getattr(entry, self.mother_leaf + '_MM'), getattr(entry, self.mother_leaf + '_PT'), getattr(entry, self.mother_leaf + '_Y'), getattr(entry, self.mother_leaf + '_OWNPV_Z'), v_OWNPV.Mag(), dZ, tZ, getattr(entry, self.dimuon_leafs[0] + '_PIDmu'), getattr(entry, self.dimuon_leafs[1] + '_PIDmu'), getattr(entry, self.dimuon_leafs[0] + '_PIDK'), getattr(entry, self.dimuon_leafs[1] + '_PIDK'), getattr(entry, 'eHcal'), getattr(entry, 'eEcal'), getattr(entry, 'nVeloClusters')) print( ' --- Done ! Ran over {} events with {:.1f}% removed from cuts !'. format(entry_number, float(entry_exlude) / float(entry_number) * 100)) return ntuple
def irun(nEvents, accept=lambda: True, postAction=None, preAction=None, EvtMax=-1, progress=False, running=False): """Run over events till certain number of good events is found >>> def accept_function () : >>> for i in irun ( 100 , accept_function ) : ... hdr = get('/Event/Rec/Header') ... print hdr.runNumber(), hdr.eventNumber() e.g. one can select events with at least 2 J/psi: good_events = lambda : 2<= len(get('/Event/PSIX/Phys/SelDetachedPsisForBandQ/Particles')) The command can be useful in scripts """ with Action(preAction, postAction): ## get application manager: _g = appMgr() sc = SUCCESS evnt = True iev = 0 nev = 0 from Ostap.progress_bar import ProgressBar, RunningBar with Action(preRun_actions(), postRun_actions()): with RunAction(): use_rbar = running use_pbar = progress and 1 < nEvents and not running pbar = ProgressBar(max_value=nEvents, silent=not use_pbar) rbar = RunningBar(silent=not use_rbar) while sc.isSuccess() and evnt and (nEvents < 0 or iev < nEvents): sc = _next_event_(1) if sc.isFailure(): break ## BREAK evnt = get('/Event') if not evnt: if sc.isSuccess(): sc.setCode(2) break ## BREAK! pbar += 1 rbar += 1 nev += 1 ## total number of event if 0 < EvtMax < nev: break ## BREAK if 0 == nev % 1000000: logger.fatal('irun: run over %d, found %s ' % (nev, iev)) elif 0 == nev % 100000: logger.error('irun: run over %d, found %s ' % (nev, iev)) elif 0 == nev % 10000: logger.warning('irun: run over %d, found %s ' % (nev, iev)) elif 0 == nev % 5000: logger.info('irun: run over %d, found %s ' % (nev, iev)) elif 0 == nev % 1000: logger.debug('irun: run over %d, found %s ' % (nev, iev)) elif 0 == nev % 500: logger.verbose('irun: run over %d, found %s ' % (nev, iev)) ## check if accept(): ## GOOD EVENT! iev += 1 yield evnt, iev, nev ## retun
def makePlots ( the_func , particle , stripping , polarity , trackcuts , runMin=0 , runMax = -1 , verbose = True , maxFiles = -1 ): #********************************************************************** from PIDPerfScripts.DataFuncs import CheckStripVer, CheckMagPol, CheckPartType CheckStripVer ( stripping ) CheckMagPol ( polarity ) CheckPartType ( particle ) #====================================================================== # Create dictionary holding: # - Reconstruction version ['RecoVer'] # - np.array of: # - MagUp run limits ['UpRuns'] # - MagDown run limits ['DownRuns'] #====================================================================== from PIDPerfScripts.DataFuncs import GetRunDictionary DataDict = GetRunDictionary ( stripping , particle , verbose = verbose ) if trackcuts and 0 < runMin : trackcuts +=' && runNumber>=%d ' % runMin if trackcuts and 0 < runMax : trackcuts +=' && runNumber<=%d ' % runMax #====================================================================== # Determine min and max file indicies #====================================================================== if runMax < runMin : runMax = None from PIDPerfScripts.DataFuncs import GetMinMaxFileDictionary IndexDict = GetMinMaxFileDictionary( DataDict , polarity , runMin , runMax , maxFiles , verbose ) #====================================================================== # Append runNumber limits to TrackCuts #====================================================================== logger.debug ( 'Track Cuts: %s ' % trackcuts ) #====================================================================== # Declare default list of PID plots #====================================================================== plots = [] minEntries = 1000 #====================================================================== # Loop over all calibration subsamples #====================================================================== mn = IndexDict['minIndex'] mx = IndexDict['maxIndex'] from Ostap.Utils import memory,NoContext logger.info('Start the loop over %d datafiles %s %s %s ' % ( mx - mn + 1 , particle , stripping , polarity ) ) from Ostap.progress_bar import ProgressBar with ProgressBar( mn , mx+1 , 100 , mode='fixed' ) as bar : for index in xrange ( mn , mx + 1 ) : bar += 1 manager = memory() if verbose else NoContext() with manager : dataset = getDataSet ( particle , stripping , polarity , trackcuts , index , verbose = verbose ) if not dataset : continue plots = the_func ( particle , dataset , plots , verbose ) dataset.reset () dataset.store ().reset () dataset.store ().Reset () dataset.Delete () if dataset : del dataset return plots