def add_new_branch(chain, branch_name, function, verbose=True): """Add new branch for loong chain in parallel - see ROOT.TTree.add_new_branch >>> chain = .... >>> chain.padd_new_branch ( 'new_branch' , 'px*py' ) """ from ostap.trees.trees import Chain from ostap.trees.trees import add_new_branch as _add_branch_ if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass elif isinstance(chain, ROOT.TTree): return _add_branch_(chain, branch_name, function, verbose=False) ch = Chain(chain) task = AddBranch(branch_name, function) wmgr = WorkManager(silent=not verbose) trees = ch.split(max_files=1) wmgr.process(task, trees) nc = ROOT.TChain(chain.name) for f in ch.files: nc.Add(f) return nc
def cproject(chain, histo, what, cuts, nentries=-1, first=0, chunk_size=1000000, silent=False): """Make a projection of the loooong chain into histogram >>> chain = ... ## large chain >>> histo = ... ## histogram template >>> cproject ( chain , histo , 'mass' , 'pt>10' ) >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto For 12-core machine, clear speedup factor of about 8 is achieved """ # from ostap.trees.trees import Chain ch = Chain(chain, first=first, nevents=nentries) task = ProjectTask(histo, what, cuts) wmgr = Parallel.WorkManager(silent=silent) wmgr.process(task, ch.split(chunk_size=chunk_size)) filtered = task.output[0] histo += task.output[1] return filtered, histo
def add_new_branch ( chain , branch_name , function , verbose = True ) : """Add new branch for loong chain in parallel - see ROOT.TTree.add_new_branch >>> chain = .... >>> chain.padd_new_branch ( 'new_branch' , 'px*py' ) """ from ostap.trees.trees import Chain from ostap.trees.trees import add_new_branch as _add_branch_ if isinstance ( chain , ROOT.TChain ) and 1 < len ( chain.files () ) : pass elif isinstance ( chain , ROOT.TTree ) : return _add_branch_ ( chain , branch_name , function , verbose = False ) ch = Chain ( chain ) branches = set ( chain.branches() ) task = AddBranch ( branch_name , function ) wmgr = WorkManager ( silent = not verbose ) trees = ch.split ( max_files = 1 ) wmgr.process ( task , trees ) nc = ROOT.TChain ( chain.name ) for f in ch.files : nc.Add ( f ) nb = list ( set ( nc.branches () ) - branches ) if nb : logger.info ( 'Added branches:\n%s' % nc.table ( variables = nb , prefix = '# ' ) ) return nc
def cproject ( chain , histo , what , cuts , nentries = -1 , first = 0 , chunk_size = -1 , max_files = 5 , silent = False , **kwargs ) : """Make a projection of the loooong chain into histogram >>> chain = ... ## large chain >>> histo = ... ## histogram template >>> cproject ( chain , histo , 'mass' , 'pt>10' ) >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto For 12-core machine, clear speedup factor of about 8 is achieved """ # from ostap.trees.trees import Chain ch = Chain ( chain , first = first , nevents = nentries ) task = ProjectTask ( histo , what , cuts ) wmgr = WorkManager ( silent = silent , **kwargs ) wmgr.process ( task , ch.split ( chunk_size = chunk_size , max_files = max_files ) ) ## unpack results _f , _h = task.results () filtered = _f histo += _h del _h return filtered , histo
def addChoppingResponse( chain, ## input dataset to be updated chopper, ## chopping category/formula N, ## number of categrories inputs, ## input variables weights_files, ## files with TMVA weigths (tar/gz or xml) category_name='chopping', ## category name prefix='tmva_', ## prefix for TMVA-variable suffix='_response', ## suffix for TMVA-variable options='', ## TMVA-reader options verbose=True, ## verbosity flag aux=0.9): """ Helper function to add TMVA/chopping response into dataset >>> tar_file = trainer.tar_file >>> chain = ... >>> inputs = [ 'var1' , 'var2' , 'var2' ] ## input varibales to TMVA >>> addChoppingResponse ( chain , chopper , inputs , tar_file , prefix = 'tmva_' ) """ from ostap.tools.chopping import addChoppingResponse as _add_response_ if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass else: return _add_response_(dataset=chain, chopper=chopper, N=N, inputs=inputs, weights_files=weights_files, prefix=prefix, suffix=suffix, options=options, verbose=verbose, aux=aux) from ostap.trees.trees import Chain ch = Chain(chain) task = AddChopping(chopper=chopper, N=N, inputs=inputs, weights_files=weights_files, prefix=prefix, suffix=suffix, options=options, verbose=verbose, aux=aux) wmgr = WorkManager(silent=False) trees = ch.split(max_files=1) wmgr.process(task, trees) nc = ROOT.TChain(chain.name) for f in ch.files: nc.Add(f) return nc
def addTMVAResponse( chain, ## input chain inputs, ## input variables weights_files, ## files with TMVA weigths (tar/gz or xml) prefix='tmva_', ## prefix for TMVA-variable suffix='_response', ## suffix for TMVA-variable options='', ## TMVA-reader options verbose=True, ## verbosity flag aux=0.9, **kwargs): ## for Cuts method : efficiency cut-off """ Helper function to add TMVA response into loong TChain >>> tar_file = trainer.tar_file >>> dataset = ... >>> inputs = [ 'var1' , 'var2' , 'var2' ] >>> dataset.addTMVAResponse ( inputs , tar_file , prefix = 'tmva_' ) """ from ostap.tools.tmva import addTMVAResponse as _add_response_ if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass else: return _add_response_(dataset=chain, inputs=inputs, weights_files=weights_files, prefix=prefix, suffix=suffix, verbose=verbose, aux=aux) from ostap.trees.trees import Chain ch = Chain(chain) branches = set(chain.branches()) ## create the task task = AddTMVA(inputs=inputs, weights_files=weights_files, prefix=prefix, suffix=suffix, verbose=verbose, aux=aux) wmgr = WorkManager(silent=False, **kwargs) trees = ch.split(max_files=1) wmgr.process(task, trees) nc = ROOT.TChain(chain.name) for f in ch.files: nc.Add(f) nb = list(set(nc.branches()) - branches) if nb: logger.info('Added branches:\n%s' % nc.table(variables=nb, prefix='# ')) return nc
def pprocess( chain, selector, nevents=-1, first=0, shortcut=True, ## important chunk_size=100000, ## important max_files=5, ppservers=(), use_frame=20000, ## important silent=False): """ Parallel processing of loooong chain/tree >>>chain = ... >>> selector = ... >>> chain.pprocess ( selector ) """ from ostap.trees.trees import Chain ch = Chain(chain) selection = selector.selection variables = selector.variables ## trivial = selector.trivial_vars and not selector.morecuts trivial = selector.really_trivial and not selector.morecuts all = 0 == first and (0 > nevents or len(chain) <= nevents) if all and trivial and 1 < len(ch.files): logger.info( "Configuration is ``trivial'': redefine ``chunk-size'' to -1") chunk_size = -1 task = FillTask(variables, selection, trivial, use_frame) wmgr = WorkManager(ppservers=ppservers, silent=silent) trees = ch.split(chunk_size=chunk_size, max_files=max_files) wmgr.process(task, trees) del trees dataset, stat = task.results() selector.data = dataset selector.stat = stat from ostap.logger.logger import attention skipped = 'Skipped:%d' % stat.skipped skipped = '/' + attention(skipped) if stat.skipped else '' logger.info( 'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' % (selector.name, stat.processed, stat.total, skipped, selector.cuts(), dataset)) return 1
def pStatVar(chain, what, cuts='', nevents=-1, first=0, chunk_size=250000, max_files=1, silent=True, **kwargs): """ Parallel processing of loooong chain/tree >>> chain = ... >>> chain.pstatVar( 'mass' , 'pt>1') """ ## few special/trivial cases print('I am pStatVar') last = min(n_large, first + nevents if 0 < nevents else n_large) if 0 <= first and 0 < nevents < chunk_size: print('I am pStatVar/0') return chain.statVar(what, cuts, first, last) elif isinstance(chain, ROOT.TChain): if 1 == chain.nFiles() and len(chain) < chunk_size: print('I am pStatVar/1') return chain.statVar(what, cuts, first, last) elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size: print('I am pStatVar/2') return chain.statVar(what, cuts, first, last) from ostap.trees.trees import Chain ch = Chain(chain, first=first, nevents=nevents) task = StatVarTask(what, cuts) wmgr = WorkManager(silent=silent, **kwargs) trees = ch.split(chunk_size=chunk_size, max_files=max_files) print('statvar-pprocess', chain.GetName(), len(trees)) wmgr.process(task, trees) del trees del ch results = task.results() return results
def _pprocess_(chain, selector, nevents=-1, first=0, shortcut=True, chunk_size=100000, ppservers=(), silent=False): """ Parallel processing of loooong chain/tree >>>chain = ... >>> selector = ... >>> chain.pprocess ( selector ) """ from ostap.trees.trees import Chain ch = Chain(chain) selection = selector.selection variables = selector.variables trivial = selector.trivial all = 0 == first and (0 > nevents or len(chain) <= nevents) if all and trivial and 1 < len(ch.files): logger.info( "Configuration is ``trivial'': redefine ``chunk-size'' to -1") chunk_size = -1 task = FillTask(variables, selection, trivial) wmgr = Parallel.WorkManager(ppservers=ppservers, silent=silent) wmgr.process(task, ch.split(chunk_size=chunk_size)) dataset, stat = task.output selector.data = dataset selector.stat = stat from ostap.logger.logger import attention skipped = 'Skipped:%d' % stat[2] skipped = '/' + attention(skipped) if stat[2] else '' logger.info( 'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' % (selector.name, stat[1], stat[0], skipped, selector.cuts(), dataset)) return 1
def pStatVar(chain, what, cuts='', nevents=-1, first=0, chunk_size=100000, max_files=10, ppservers=(), silent=True): """ Parallel processing of loooong chain/tree >>> chain = ... >>> chain.pstatVar( 'mass' , 'pt>1') """ ## few special/trivial cases last = min(n_large, first + nevents if 0 < nevents else n_large) if 0 <= first and 0 < nevents < chunk_size: return chain.statVar(what, cuts, first, last) elif isinstance(chain, ROOT.TChain): if chain.nFiles() < 5 and len(chain) < chunk_size: return chain.statVar(what, cuts, first, last) elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size: return chain.statVar(what, cuts, first, last) from ostap.trees.trees import Chain ch = Chain(chain, first=first, nevents=nevents) task = StatVarTask(what, cuts) wmgr = WorkManager(ppservers=ppservers, silent=silent) trees = ch.split(chunk_size=chunk_size, max_files=max_files) wmgr.process(task, trees) del trees del ch results = task.results() return results
def parallel_fill ( chain , selector , nevents = -1 , first = 0 , shortcut = True , ## important chunk_size = 1000000 , ## important max_files = 5 , use_frame = 20000 , ## important silent = False , job_chunk = -1 , **kwargs ) : """ Parallel processing of loooong chain/tree >>>chain = ... >>> selector = ... >>> chain.pprocess ( selector ) """ import ostap.fitting.roofit from ostap.fitting.pyselectors import SelectorWithVars from ostap.trees.trees import Chain assert isinstance ( selector , SelectorWithVars ) , \ "Invalid type of ``selector'': %s" % type ( selector ) ch = Chain ( chain ) selection = selector.selection variables = selector.variables roo_cuts = selector.roo_cuts ## trivial = selector.trivial_vars and not selector.morecuts trivial = selector.really_trivial and not selector.morecuts all = 0 == first and ( 0 > nevents or len ( chain ) <= nevents ) if all and trivial and 1 < len( ch.files ) : logger.info ("Configuration is ``trivial'': redefine ``chunk-size'' to -1") chunk_size = -1 task = FillTask ( variables = variables , selection = selection , roo_cuts = roo_cuts , trivial = trivial , use_frame = use_frame ) wmgr = WorkManager ( silent = silent , **kwargs ) trees = ch.split ( chunk_size = chunk_size , max_files = max_files ) wmgr.process( task , trees , chunk_size = job_chunk ) del trees dataset, stat = task.results() selector.data = dataset selector.stat = stat from ostap.logger.logger import attention skipped = 'Skipped:%d' % stat.skipped skipped = '/' + attention ( skipped ) if stat.skipped else '' logger.info ( 'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n%s' % ( selector.name , stat.processed , stat.total , skipped , selector.cuts() , dataset.table ( prefix = '# ' ) ) ) return dataset, stat
def reduce(chain, selection={}, save_vars=(), new_vars={}, no_vars=(), output='', name='', addselvars=False, silent=False, **kwargs): """ Parallel processing of loooong chain/tree >>>chain = ... >>> selector = ... >>> chain.pprocess ( selector ) """ from ostap.trees.trees import Chain from ostap.frames.tree_reduce import ReduceTree if isinstance(chain, ROOT.TChain) and 1 >= len(chain.files()): return chain.reduce(selection=selection, save_vars=save_vars, new_vars=new_vars, no_vars=no_vars, output=output, name=name, addselvars=addselvars, silent=silent) nb0 = len(chain.branches()) ne0 = len(chain) ch = Chain(chain) task = ReduceTask(selection=selection, save_vars=save_vars, new_vars=new_vars, addselvars=addselvars, name=name) wmgr = WorkManager(silent=silent, **kwargs) trees = ch.split(max_files=1) wmgr.process(task, trees) result, table = task.results() for i in result.files: result.trash.add(i) if output: ## merge results into single output file reduced = ReduceTree(result.chain, selection='', save_vars=(), addselvars=False, silent=True, output=output, name=name) result = Chain(reduced.chain) if not silent: from ostap.frames.frames import report_print_table title = 'Tree -> Frame -> Tree filter/transformation' logger.info('Reduce tree:\n%s' % report_print_table(table, title, '# ')) nb = len(result.chain.branches()) ne = len(result.chain) f = float(nb0 * ne0) / (nb * ne) logger.info('reduce: (%dx%d) -> (%dx%d) %.1f (branches x entries) ' % (nb0, ne0, nb, ne, f)) return result