Ejemplo n.º 1
0
def add_new_branch(chain, branch_name, function, verbose=True):
    """Add new branch for loong chain in parallel
    - see ROOT.TTree.add_new_branch
    >>> chain = ....
    >>> chain.padd_new_branch ( 'new_branch' , 'px*py' )     
    """
    from ostap.trees.trees import Chain
    from ostap.trees.trees import add_new_branch as _add_branch_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    elif isinstance(chain, ROOT.TTree):
        return _add_branch_(chain, branch_name, function, verbose=False)

    ch = Chain(chain)

    task = AddBranch(branch_name, function)
    wmgr = WorkManager(silent=not verbose)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    return nc
Ejemplo n.º 2
0
def cproject(chain,
             histo,
             what,
             cuts,
             nentries=-1,
             first=0,
             chunk_size=1000000,
             silent=False):
    """Make a projection of the loooong chain into histogram
    >>> chain = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> cproject        ( chain , histo , 'mass' , 'pt>10' )
    >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto 
    >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto     
    For 12-core machine, clear speedup factor of about 8 is achieved     
    """
    #

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nentries)

    task = ProjectTask(histo, what, cuts)
    wmgr = Parallel.WorkManager(silent=silent)
    wmgr.process(task, ch.split(chunk_size=chunk_size))

    filtered = task.output[0]
    histo += task.output[1]

    return filtered, histo
Ejemplo n.º 3
0
def  cproject ( chain                ,
                histo                ,
                what                 ,
                cuts                 ,
                nentries   = -1      ,
                first      =  0      ,
                chunk_size = -1      ,
                max_files  =  5      , 
                silent     = False   , **kwargs ) :
    """Make a projection of the loooong chain into histogram
    >>> chain = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> cproject        ( chain , histo , 'mass' , 'pt>10' )
    >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto 
    >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto     
    For 12-core machine, clear speedup factor of about 8 is achieved     
    """
    #
    from ostap.trees.trees import Chain
    ch    = Chain ( chain , first = first , nevents = nentries )
    
    task  = ProjectTask ( histo , what , cuts )
    wmgr  = WorkManager ( silent = silent , **kwargs )    
    wmgr.process ( task , ch.split ( chunk_size = chunk_size , max_files = max_files ) )

    ## unpack results 
    _f , _h    = task.results ()
    filtered   = _f
    histo     += _h
    del _h 
    
    return filtered , histo 
Ejemplo n.º 4
0
def add_new_branch ( chain , branch_name , function , verbose = True ) :
    """Add new branch for loong chain in parallel
    - see ROOT.TTree.add_new_branch
    >>> chain = ....
    >>> chain.padd_new_branch ( 'new_branch' , 'px*py' )     
    """
    from ostap.trees.trees import Chain
    from ostap.trees.trees import add_new_branch as _add_branch_ 
    
    if   isinstance ( chain , ROOT.TChain ) and 1 < len ( chain.files () ) : pass 
    elif isinstance ( chain , ROOT.TTree  ) : 
        return _add_branch_ ( chain , branch_name , function , verbose = False ) 
    
    ch       = Chain ( chain ) 
    branches = set   ( chain.branches() )
    
    task     = AddBranch   ( branch_name ,  function  )
    wmgr     = WorkManager ( silent = not verbose  )
    trees    = ch.split    ( max_files = 1  )
    
    wmgr.process ( task , trees )
    
    nc = ROOT.TChain ( chain.name )
    for f in ch.files :  nc.Add ( f )
    
    nb = list ( set ( nc.branches () ) - branches ) 
    if nb : logger.info ( 'Added branches:\n%s' % nc.table ( variables = nb , prefix = '# ' ) ) 

    return nc 
Ejemplo n.º 5
0
def addChoppingResponse(
        chain,  ## input dataset to be updated
        chopper,  ## chopping category/formula 
        N,  ## number of categrories
        inputs,  ## input variables 
        weights_files,  ## files with TMVA weigths (tar/gz or xml)
        category_name='chopping',  ## category name 
        prefix='tmva_',  ## prefix for TMVA-variable         
        suffix='_response',  ## suffix for TMVA-variable 
        options='',  ## TMVA-reader options
        verbose=True,  ## verbosity flag 
        aux=0.9):
    """
    Helper function to add TMVA/chopping  response into dataset
    >>> tar_file = trainer.tar_file
    >>> chain    = ...
    >>> inputs   = [ 'var1' , 'var2' , 'var2' ] ## input varibales to TMVA 
    >>> addChoppingResponse ( chain , chopper ,  inputs , tar_file , prefix = 'tmva_' )
    """

    from ostap.tools.chopping import addChoppingResponse as _add_response_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    else:
        return _add_response_(dataset=chain,
                              chopper=chopper,
                              N=N,
                              inputs=inputs,
                              weights_files=weights_files,
                              prefix=prefix,
                              suffix=suffix,
                              options=options,
                              verbose=verbose,
                              aux=aux)

    from ostap.trees.trees import Chain
    ch = Chain(chain)

    task = AddChopping(chopper=chopper,
                       N=N,
                       inputs=inputs,
                       weights_files=weights_files,
                       prefix=prefix,
                       suffix=suffix,
                       options=options,
                       verbose=verbose,
                       aux=aux)

    wmgr = WorkManager(silent=False)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    return nc
Ejemplo n.º 6
0
def addTMVAResponse(
        chain,  ## input chain 
        inputs,  ## input variables 
        weights_files,  ## files with TMVA weigths (tar/gz or xml)
        prefix='tmva_',  ## prefix for TMVA-variable 
        suffix='_response',  ## suffix for TMVA-variable
        options='',  ## TMVA-reader options
        verbose=True,  ## verbosity flag 
        aux=0.9,
        **kwargs):  ## for Cuts method : efficiency cut-off
    """
    Helper function to add TMVA  response into loong TChain
    >>> tar_file = trainer.tar_file
    >>> dataset  = ...
    >>> inputs = [ 'var1' , 'var2' , 'var2' ]
    >>> dataset.addTMVAResponse (  inputs , tar_file , prefix = 'tmva_' )
    """
    from ostap.tools.tmva import addTMVAResponse as _add_response_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    else:
        return _add_response_(dataset=chain,
                              inputs=inputs,
                              weights_files=weights_files,
                              prefix=prefix,
                              suffix=suffix,
                              verbose=verbose,
                              aux=aux)

    from ostap.trees.trees import Chain
    ch = Chain(chain)
    branches = set(chain.branches())

    ## create the task
    task = AddTMVA(inputs=inputs,
                   weights_files=weights_files,
                   prefix=prefix,
                   suffix=suffix,
                   verbose=verbose,
                   aux=aux)

    wmgr = WorkManager(silent=False, **kwargs)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    nb = list(set(nc.branches()) - branches)
    if nb:
        logger.info('Added branches:\n%s' %
                    nc.table(variables=nb, prefix='# '))

    return nc
Ejemplo n.º 7
0
def pprocess(
        chain,
        selector,
        nevents=-1,
        first=0,
        shortcut=True,  ## important 
        chunk_size=100000,  ## important 
        max_files=5,
        ppservers=(),
        use_frame=20000,  ## important 
        silent=False):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain

    ch = Chain(chain)

    selection = selector.selection
    variables = selector.variables

    ## trivial   = selector.trivial_vars and not selector.morecuts

    trivial = selector.really_trivial and not selector.morecuts

    all = 0 == first and (0 > nevents or len(chain) <= nevents)

    if all and trivial and 1 < len(ch.files):
        logger.info(
            "Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1

    task = FillTask(variables, selection, trivial, use_frame)
    wmgr = WorkManager(ppservers=ppservers, silent=silent)
    trees = ch.split(chunk_size=chunk_size, max_files=max_files)
    wmgr.process(task, trees)
    del trees

    dataset, stat = task.results()

    selector.data = dataset
    selector.stat = stat

    from ostap.logger.logger import attention
    skipped = 'Skipped:%d' % stat.skipped
    skipped = '/' + attention(skipped) if stat.skipped else ''
    logger.info(
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' %
        (selector.name, stat.processed, stat.total, skipped, selector.cuts(),
         dataset))

    return 1
Ejemplo n.º 8
0
def pStatVar(chain,
             what,
             cuts='',
             nevents=-1,
             first=0,
             chunk_size=250000,
             max_files=1,
             silent=True,
             **kwargs):
    """ Parallel processing of loooong chain/tree 
    >>> chain    = ...
    >>> chain.pstatVar( 'mass' , 'pt>1') 
    """
    ## few special/trivial cases

    print('I am pStatVar')

    last = min(n_large, first + nevents if 0 < nevents else n_large)

    if 0 <= first and 0 < nevents < chunk_size:
        print('I am pStatVar/0')
        return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TChain):
        if 1 == chain.nFiles() and len(chain) < chunk_size:
            print('I am pStatVar/1')
            return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size:
        print('I am pStatVar/2')
        return chain.statVar(what, cuts, first, last)

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nevents)

    task = StatVarTask(what, cuts)
    wmgr = WorkManager(silent=silent, **kwargs)

    trees = ch.split(chunk_size=chunk_size, max_files=max_files)

    print('statvar-pprocess', chain.GetName(), len(trees))
    wmgr.process(task, trees)

    del trees
    del ch

    results = task.results()

    return results
Ejemplo n.º 9
0
def _pprocess_(chain,
               selector,
               nevents=-1,
               first=0,
               shortcut=True,
               chunk_size=100000,
               ppservers=(),
               silent=False):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain

    ch = Chain(chain)

    selection = selector.selection
    variables = selector.variables
    trivial = selector.trivial

    all = 0 == first and (0 > nevents or len(chain) <= nevents)

    if all and trivial and 1 < len(ch.files):
        logger.info(
            "Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1

    task = FillTask(variables, selection, trivial)
    wmgr = Parallel.WorkManager(ppservers=ppservers, silent=silent)
    wmgr.process(task, ch.split(chunk_size=chunk_size))

    dataset, stat = task.output

    selector.data = dataset
    selector.stat = stat

    from ostap.logger.logger import attention
    skipped = 'Skipped:%d' % stat[2]
    skipped = '/' + attention(skipped) if stat[2] else ''
    logger.info(
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' %
        (selector.name, stat[1], stat[0], skipped, selector.cuts(), dataset))

    return 1
Ejemplo n.º 10
0
def pStatVar(chain,
             what,
             cuts='',
             nevents=-1,
             first=0,
             chunk_size=100000,
             max_files=10,
             ppservers=(),
             silent=True):
    """ Parallel processing of loooong chain/tree 
    >>> chain    = ...
    >>> chain.pstatVar( 'mass' , 'pt>1') 
    """

    ## few special/trivial cases

    last = min(n_large, first + nevents if 0 < nevents else n_large)

    if 0 <= first and 0 < nevents < chunk_size:
        return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TChain):
        if chain.nFiles() < 5 and len(chain) < chunk_size:
            return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size:
        return chain.statVar(what, cuts, first, last)

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nevents)

    task = StatVarTask(what, cuts)
    wmgr = WorkManager(ppservers=ppservers, silent=silent)

    trees = ch.split(chunk_size=chunk_size, max_files=max_files)

    wmgr.process(task, trees)

    del trees
    del ch

    results = task.results()

    return results
Ejemplo n.º 11
0
def reduce(tree,
           selection,
           save_vars=(),
           new_vars={},
           no_vars=(),
           output='',
           name='',
           addselvars=False,
           silent=False):
    """ Powerful method to reduce/tranform the tree/chain.
    It relies on Ostap.DataFrame ( alias for ROOT.ROOT.DataFrame) and allows
    - filter entries from TTree/TChain
    - add new colums
    - remove unnesessary columns
    
    >>> tree = ....
    >>> reduced1 = tree.reduce  ( 'pt>1' )
    >>> reduced2 = tree.reduce  ( 'pt>1' , vars = [ 'p', 'pt' ,'q' ] )
    >>> reduced3 = tree.reduce  ( 'pt>1' , no_vars = [ 'Q', 'z' ,'x' ] )
    >>> reduced4 = tree.reduce  ( 'pt>1' , new_vars = { 'pt2' : 'pt*pt' } )
    >>> reduced5 = tree.reduce  ( 'pt>1' , new_vars = { 'pt2' : 'pt*pt' } , output = 'OUTPUT.root' )
    
    """

    nb0 = len(tree.branches())
    ne0 = len(tree)

    reduced = ReduceTree(tree,
                         selection=selection,
                         save_vars=save_vars,
                         new_vars=new_vars,
                         no_vars=no_vars,
                         output=output,
                         name=name,
                         addselvars=addselvars,
                         tmp_keep=True,
                         silent=silent)

    from ostap.trees.trees import Chain

    result = Chain(reduced.chain)
    if not output: result.trash.add(reduced.output)

    if not silent:
        logger.info('Reduce: %s' % str(reduced))
    else:
        nb = len(result.chain.branches())
        ne = len(result.chain)
        f = float(nb0 * ne0) / (nb * ne)
        logger.info('reduce: (%dx%d) -> (%dx%d) %.1f (branches x entries) ' %
                    (nb0, ne0, nb, ne, f))

    return result
Ejemplo n.º 12
0
    def process(self, jobid, item):
        """The actual processing
        ``params'' is assumed to be a tuple-like entity:
        - the file name
        - the tree name in the file
        - the variable/expression/expression list of quantities to project
        - the selection/weighting criteria 
        - the first entry in tree to process
        - number of entries to process
        """

        import ROOT
        from ostap.logger.utils import logWarning
        with logWarning():
            import ostap.core.pyrouts
            import ostap.trees.trees
            import ostap.histos.histos
            import ostap.frames.frames
            from ostap.trees.trees import Chain, Tree

        input = Chain(name=item.name,
                      files=item.files,
                      first=item.first,
                      nevents=item.nevents)

        chain = input.chain
        first = input.first
        nevents = input.nevents

        ## use the regular projection
        from ostap.trees.trees import _tt_project_

        ## Create the output histogram  NB! (why here???)
        from ostap.core.core import ROOTCWD

        with ROOTCWD():

            ROOT.gROOT.cd()
            histo = self.histo.Clone()
            self.__output = 0, histo

            from ostap.trees.trees import _tt_project_
            self.__output = _tt_project_(tree=chain,
                                         histo=histo,
                                         what=self.what,
                                         cuts=self.cuts,
                                         options='',
                                         nentries=nevents,
                                         firstentry=first)
        del item

        return self.__output
Ejemplo n.º 13
0
    def process(self, jobid, item):

        import ROOT
        import ostap.core.pyrouts
        from ostap.trees.trees import Chain
        from ostap.frames.tree_reduce import ReduceTree

        ## unpack the input data
        chain = item.chain

        rt = ReduceTree(
            chain,
            selection=self.selection,
            save_vars=self.save_vars,
            new_vars=self.new_vars,
            addselvars=self.addselvars,
            name=self.name,
            tmp_keep=True,  ## attention! True is here! 
            silent=True)

        cname = rt.chain.GetName()
        cfile = rt.output

        return Chain(name=cname, files=[cfile]), rt.table
Ejemplo n.º 14
0
def parallel_fill ( chain                  ,
                    selector               ,
                    nevents      = -1      ,
                    first        = 0       ,
                    shortcut     = True    ,   ## important 
                    chunk_size   = 1000000 ,   ## important 
                    max_files    = 5       ,
                    use_frame    =  20000  ,   ## important 
                    silent       = False   ,
                    job_chunk    = -1      , **kwargs ) :
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """
    import ostap.fitting.roofit 
    from   ostap.fitting.pyselectors import SelectorWithVars 
    from   ostap.trees.trees         import Chain
    
    assert isinstance ( selector , SelectorWithVars ) , \
           "Invalid type of ``selector'': %s" % type ( selector ) 
    
    ch = Chain ( chain ) 

    selection = selector.selection
    variables = selector.variables
    roo_cuts  = selector.roo_cuts
    
    ## trivial   = selector.trivial_vars and not selector.morecuts
    
    trivial   = selector.really_trivial and not selector.morecuts 
    
    all = 0 == first and ( 0 > nevents or len ( chain ) <= nevents )
    
    if all and trivial and 1 < len( ch.files ) :
        logger.info ("Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1
        
    task  = FillTask ( variables = variables ,
                       selection = selection ,
                       roo_cuts  = roo_cuts  ,
                       trivial   = trivial   ,
                       use_frame = use_frame )
    
    wmgr  = WorkManager ( silent     = silent     , **kwargs )
    trees = ch.split    ( chunk_size = chunk_size , max_files = max_files )
    wmgr.process( task , trees , chunk_size = job_chunk )
    del trees
    
    dataset, stat = task.results()  

    selector.data = dataset
    selector.stat = stat 

    from ostap.logger.logger import attention 
    skipped = 'Skipped:%d' % stat.skipped
    skipped = '/' + attention ( skipped ) if stat.skipped else ''
    logger.info (
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n%s' % (
        selector.name    ,
        stat.processed   ,
        stat.total       ,
        skipped          ,
        selector.cuts()  , dataset.table ( prefix = '# ' ) ) )             
    
    return dataset, stat  
Ejemplo n.º 15
0
    def __init__(
        self,
        category,  ## accessor to category 
        N,  ## number of categories 
        methods,  ## list of TMVA methods
        variables,  ## list of variables 
        signal,  ## signal tree
        background,  ## background tree
        signal_cuts='',  ## signal cuts 
        background_cuts='',  ## background cuts 
        spectators=[],
        bookingoptions="Transformations=I;D;P;G,D",
        configuration="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V",
        signal_weight=None,
        background_weight=None,
        name='TMVAChopper',  ## the name 
        verbose=False,  ## verbose ? 
        chop_signal=False,  ## chop the signal     ?
        chop_background=True):  ## chop the background ?
        """Create TMVA ``chopping'' trainer
        
        >>> N = 11 
        >>> trainer = Trainer (
        ... category = '137*evt+813*run' ,
        ... N        = N                 , 
        ... methods =  [ # type                   name   configuration
        ...      ( ROOT.TMVA.Types.kMLP        , 'MLP'        , 'H:!V:EstimatorType=CE:VarTransform=N:NCycles=200:HiddenLayers=N+3:TestRate=5:!UseRegulator' ) ,
        ...      ( ROOT.TMVA.Types.kBDT        , 'BDTG'       , 'H:!V:NTrees=100:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2' ) , 
        ...      ( ROOT.TMVA.Types.kCuts       , 'Cuts'       , 'H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart' ) ,
        ...      ( ROOT.TMVA.Types.kFisher     , 'Fisher'     , 'H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10' ),
        ...      ( ROOT.TMVA.Types.kLikelihood , 'Likelihood' , 'H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50' ) ] ,
        ... variables  = [ 'var1' , 'var2' ,  'var3' ] ,  ## Variables to use in the training
        ... signal     = signal_tree      , ## TTree/TChain with ``signal'' sample   
        ... background = background_tree  , ## TTree/TChain with ``background'' sample   
        ... name       = 'TMVAChopper'    ,
        ... verbose    = False )
        
        """
        assert isinstance(
            N, (int, long)) and 1 < N, "Invalid number of categories"

        self.__chop_signal = True if chop_signal else False
        self.__chop_background = True if chop_background else False

        assert self.__chop_signal or self.__chop_background, "Neither signal nor background chopping"

        self.__category = category
        self.__N = N

        self.__signal = signal
        self.__background = background

        self.__methods = tuple(methods)
        self.__signal_weight = signal_weight
        self.__signal_cuts = ROOT.TCut(signal_cuts)

        self.__background_weight = background_weight
        self.__background_cuts = ROOT.TCut(background_cuts)

        self.__variables = tuple(variables)
        self.__spectators = tuple(spectators)

        self.__bookingoptions = bookingoptions
        self.__configuration = configuration

        self.__name = name
        self.__verbose = True if verbose else False

        self.__sig_histos = ()
        self.__bkg_histos = ()

        cat = '(%s)%%%d' % (self.category, self.N)

        if self.chop_signal:
            hs1 = ROOT.TH1F(hID(), 'Signal categories', self.N * 5, -0.5,
                            self.N - 1)
            hs2 = h1_axis([-0.5 + i for i in range(self.N + 1)],
                          title=hs1.GetTitle())
            self.signal.project(hs1, cat, self.signal_cuts)
            self.signal.project(hs2, cat, self.signal_cuts)
            self.__sig_histos = hs1, hs2
            st = hs2.stat()
            if 0 >= st.min():
                logger.warning("Some signal categories are empty!")
            logger.info('Signal     category population mean/rms: %s/%s' %
                        (st.mean(), st.rms()))

        if self.chop_background:
            hb1 = ROOT.TH1F(hID(), 'Background categories', self.N * 5, -0.5,
                            self.N - 1)
            hb2 = h1_axis([-0.5 + i for i in range(self.N + 1)],
                          title=hb1.GetTitle())
            self.background.project(hb1, cat, self.background_cuts)
            self.background.project(hb2, cat, self.background_cuts)
            self.__bkg_histos = hb1, hb2
            ##
            st = hb2.stat()
            if 0 >= st.min():
                logger.warning("Some background categories are empty!")
            logger.info('Background category population mean/rms: %s/%s' %
                        (st.mean(), st.rms()))

        ##  trick to please Kisa
        from ostap.trees.trees import Chain
        self.__signal = Chain(signal)
        self.__background = Chain(background)

        ## book the trainers
        self.__trainers = ()
        self.__weights_files = []
        self.__class_files = []
        self.__output_files = []
        self.__tar_file = None
        self.__log_file = None
Ejemplo n.º 16
0
def reduce(chain,
           selection={},
           save_vars=(),
           new_vars={},
           no_vars=(),
           output='',
           name='',
           addselvars=False,
           silent=False,
           **kwargs):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain
    from ostap.frames.tree_reduce import ReduceTree

    if isinstance(chain, ROOT.TChain) and 1 >= len(chain.files()):
        return chain.reduce(selection=selection,
                            save_vars=save_vars,
                            new_vars=new_vars,
                            no_vars=no_vars,
                            output=output,
                            name=name,
                            addselvars=addselvars,
                            silent=silent)

    nb0 = len(chain.branches())
    ne0 = len(chain)

    ch = Chain(chain)

    task = ReduceTask(selection=selection,
                      save_vars=save_vars,
                      new_vars=new_vars,
                      addselvars=addselvars,
                      name=name)

    wmgr = WorkManager(silent=silent, **kwargs)
    trees = ch.split(max_files=1)
    wmgr.process(task, trees)

    result, table = task.results()
    for i in result.files:
        result.trash.add(i)

    if output:  ## merge results into single output file
        reduced = ReduceTree(result.chain,
                             selection='',
                             save_vars=(),
                             addselvars=False,
                             silent=True,
                             output=output,
                             name=name)

        result = Chain(reduced.chain)

    if not silent:
        from ostap.frames.frames import report_print_table
        title = 'Tree -> Frame -> Tree filter/transformation'
        logger.info('Reduce tree:\n%s' %
                    report_print_table(table, title, '# '))

        nb = len(result.chain.branches())
        ne = len(result.chain)
        f = float(nb0 * ne0) / (nb * ne)
        logger.info('reduce: (%dx%d) -> (%dx%d) %.1f (branches x entries) ' %
                    (nb0, ne0, nb, ne, f))

    return result