def process(self, item): """The actual processing ``params'' is assumed to be a tuple-like entity: - the file name - the tree name in the file - the variable/expression/expression list of quantities to project - the selection/weighting criteria - the first entry in tree to process - number of entries to process """ import ROOT from ostap.logger.utils import logWarning with logWarning(): import ostap.core.pyrouts import ostap.trees.trees chain = item.chain first = item.first nevents = item.nevents ## Create the output histogram NB! (why here???) self.output = 0, self.histo.Clone() ## use the regular projection from ostap.trees.trees import _tt_project_ self.output = _tt_project_(chain, self.output[1], self.what, self.cuts, '', nevents, first) del item
def process(self, jobid, item): """The actual processing ``params'' is assumed to be a tuple-like entity: - the file name - the tree name in the file - the variable/expression/expression list of quantities to project - the selection/weighting criteria - the first entry in tree to process - number of entries to process """ import ROOT from ostap.logger.utils import logWarning with logWarning(): import ostap.core.pyrouts import ostap.trees.trees import ostap.histos.histos import ostap.frames.frames from ostap.trees.trees import Chain, Tree input = Chain(name=item.name, files=item.files, first=item.first, nevents=item.nevents) chain = input.chain first = input.first nevents = input.nevents ## use the regular projection from ostap.trees.trees import _tt_project_ ## Create the output histogram NB! (why here???) from ostap.core.core import ROOTCWD with ROOTCWD(): ROOT.gROOT.cd() histo = self.histo.Clone() self.__output = 0, histo from ostap.trees.trees import _tt_project_ self.__output = _tt_project_(tree=chain, histo=histo, what=self.what, cuts=self.cuts, options='', nentries=nevents, firstentry=first) del item return self.__output
def cproject(chain, histo, what, cuts): """Make a projection of the loooong chain into histogram >>> chain = ... ## large chain >>> histo = ... ## histogram template >>> cproject ( chain , histo , 'mass' , 'pt>10' ) >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto For 12-core machine, clear speedup factor of about 8 is achieved """ # if not chain: return 0, histo if not histo: logger.error('cproject: invalid histogram') return 0, histo import ROOT histo.Reset() if not isinstance(chain, ROOT.TChain): logger.warning( 'cproject method is TChain-specific, skip parallelization') from ostap.trees.trees import _tt_project_ return _tt_project_(chain, histo, what, cuts) if isinstance(cuts, ROOT.TCut): cuts = str(cuts) ## if isinstance(what, str): what = what.split(',') if isinstance(what, str): what = what.split(';') if isinstance(what, str): what = [what] import ostap.trees.trees files = chain.files() cname = chain.GetName() params = [(f, cname, str(w), cuts) for f in files for w in what] task = ProjectTask(histo) wmgr = Parallel.WorkManager() wmgr.process(task, params) filtered = task.output[0] histo += task.output[1] return filtered, histo
def process(self, params): """The actual processing ``params'' is assumed to be a tuple-like entity: - the file name - the tree name in the file - the variable/expression/expression list of quantities to project - the selection/weighting criteria - the first entry in tree to process - number of entries to process """ import ROOT from ostap.logger.utils import logWarning with logWarning(): import ostap.core.pyrouts if isinstance(params, str): params = (param, 0, n_large) elif isinstance(params, ROOT.TChainElement): params = (params.GetTitle(), 0, n_large) fname = params[0] ## file name tname = params[1] ## tree name what = params[2] ## variable/expression to project cuts = params[3] if 3 < len(params) else '' ## cuts first = params[4] if 4 < len(params) else 0 ## the first event nentries = params[5] if 5 < len( params) else n_large ## number of events if isinstance(fname, ROOT.TChainElement): fname = fname.GetTitle() chain = ROOT.TChain(tname) chain.Add(fname) ## Create the output histogram NB! (why here???) self.output = 0, self.histo.Clone() ## use the regular projection from ostap.trees.trees import _tt_project_ self.output = _tt_project_(chain, self.output[1], what, cuts, '', nentries, first) del chain
def tproject( tree, ## the tree histo, ## histogram what, ## variable/expression/list to be projected cuts='', ## selection/weighting criteria nentries=-1, ## number of entries first=0, ## the first entry maxentries=1000000): ## chunk size """Make a projection of the loooong tree into histogram >>> tree = ... ## large chain >>> histo = ... ## histogram template >>> tproject ( tree , histo , 'mass' , 'pt>10' ) >>> tree.pproject ( histo , 'mass' , 'pt>10' ) ## ditto - significant gain can be achieved for very large ttrees with complicated expressions and cuts - maxentries parameter should be rather large Arguments: - tree the tree - histo the histogram - what variable/expression/varlist to be projected - cuts selection/weighting criteria - nentries number of entries to process (>0: all entries in th tree) - first the first entry to process - maxentries chunk size for parallel processing """ if not tree: return 0, histo if not histo: logger.error('tproject: invalid histogram') return 0, histo import ROOT histo.Reset() num = len(tree) if num <= first: return 0, histo if 0 > nentries: nentries = n_large maxentries = long(maxentries) if 0 >= maxentries: maxentries = n_large if 0 > first: first = 0 ## use the regular projection from ostap.trees.trees import _tt_project_ fname = None tname = None if isinstance(tree, ROOT.TChain): if 1 == len(tree.files()): fname = tree.files()[0] tname = tree.GetName() else: logger.warning('``tproject' ' method is TTree-specific, skip parallelization') return _tt_project_(tree, histo, what, cuts, '', nentries, first) else: tdir = tree.GetDirectory() ftree = tdir.GetFile() if not ftree: logger.debug('TTree is not file resident, skip parallelization') return _tt_project_(tree, histo, what, cuts, '', total, first) fname = ftree.GetName() tpath = tdir.GetPath() pr, d, path = tpath.rpartition(':') tname = path + '/' + tree.GetName() if not fname: logger.info("Can't determine fname, skip parallelization") return _tt_project_(tree, histo, what, cuts, '', total, first) if not tname: logger.info("Can't determine tname, skip parallelization") return _tt_project_(tree, histo, what, cuts, '', total, first) # if isinstance(cuts, ROOT.TCut): cuts = str(cuts) if isinstance(what, ROOT.TCut): what = str(what) ## if isinstance(what, str): what = what.split(',') if isinstance(what, str): what = what.split(',') if isinstance(what, str): what = what.split(';') if isinstance(what, str): what = [what] ## nothing to project if not what: return 0, histo ## total number of events to process : total = min(num - first, nentries) ## the event range is rather short, no real need in parallel processing if total * len(what) < maxentries and len(what) < 4: return _tt_project_(tree, histo, what, cuts, '', total, first) ## number of chunks & reminder nchunks, rest = divmod(total, maxentries) csize = int(total / nchunks) ## chunk size ## final list of parameters [ (file_name, what , cuts , first_event , num_events ) , ... ] params = [] for i in range(nchunks): for w in what: params.append( (fname, tname, str(w), cuts, first + i * csize, csize)) if rest: nchunks += 1 for w in what: params.append( (fname, tname, str(w), cuts, first + nchunks * csize, rest)) task = ProjectTask(histo) wmgr = Parallel.WorkManager() wmgr.process(task, params) filtered = task.output[0] histo += task.output[1] return filtered, histo