Ejemplo n.º 1
0
def add_new_branch(chain, branch_name, function, verbose=True):
    """Add new branch for loong chain in parallel
    - see ROOT.TTree.add_new_branch
    >>> chain = ....
    >>> chain.padd_new_branch ( 'new_branch' , 'px*py' )     
    """
    from ostap.trees.trees import Chain
    from ostap.trees.trees import add_new_branch as _add_branch_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    elif isinstance(chain, ROOT.TTree):
        return _add_branch_(chain, branch_name, function, verbose=False)

    ch = Chain(chain)

    task = AddBranch(branch_name, function)
    wmgr = WorkManager(silent=not verbose)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    return nc
Ejemplo n.º 2
0
def cproject(chain,
             histo,
             what,
             cuts,
             nentries=-1,
             first=0,
             chunk_size=1000000,
             silent=False):
    """Make a projection of the loooong chain into histogram
    >>> chain = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> cproject        ( chain , histo , 'mass' , 'pt>10' )
    >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto 
    >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto     
    For 12-core machine, clear speedup factor of about 8 is achieved     
    """
    #

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nentries)

    task = ProjectTask(histo, what, cuts)
    wmgr = Parallel.WorkManager(silent=silent)
    wmgr.process(task, ch.split(chunk_size=chunk_size))

    filtered = task.output[0]
    histo += task.output[1]

    return filtered, histo
Ejemplo n.º 3
0
def add_new_branch ( chain , branch_name , function , verbose = True ) :
    """Add new branch for loong chain in parallel
    - see ROOT.TTree.add_new_branch
    >>> chain = ....
    >>> chain.padd_new_branch ( 'new_branch' , 'px*py' )     
    """
    from ostap.trees.trees import Chain
    from ostap.trees.trees import add_new_branch as _add_branch_ 
    
    if   isinstance ( chain , ROOT.TChain ) and 1 < len ( chain.files () ) : pass 
    elif isinstance ( chain , ROOT.TTree  ) : 
        return _add_branch_ ( chain , branch_name , function , verbose = False ) 
    
    ch       = Chain ( chain ) 
    branches = set   ( chain.branches() )
    
    task     = AddBranch   ( branch_name ,  function  )
    wmgr     = WorkManager ( silent = not verbose  )
    trees    = ch.split    ( max_files = 1  )
    
    wmgr.process ( task , trees )
    
    nc = ROOT.TChain ( chain.name )
    for f in ch.files :  nc.Add ( f )
    
    nb = list ( set ( nc.branches () ) - branches ) 
    if nb : logger.info ( 'Added branches:\n%s' % nc.table ( variables = nb , prefix = '# ' ) ) 

    return nc 
Ejemplo n.º 4
0
def  cproject ( chain                ,
                histo                ,
                what                 ,
                cuts                 ,
                nentries   = -1      ,
                first      =  0      ,
                chunk_size = -1      ,
                max_files  =  5      , 
                silent     = False   , **kwargs ) :
    """Make a projection of the loooong chain into histogram
    >>> chain = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> cproject        ( chain , histo , 'mass' , 'pt>10' )
    >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto 
    >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto     
    For 12-core machine, clear speedup factor of about 8 is achieved     
    """
    #
    from ostap.trees.trees import Chain
    ch    = Chain ( chain , first = first , nevents = nentries )
    
    task  = ProjectTask ( histo , what , cuts )
    wmgr  = WorkManager ( silent = silent , **kwargs )    
    wmgr.process ( task , ch.split ( chunk_size = chunk_size , max_files = max_files ) )

    ## unpack results 
    _f , _h    = task.results ()
    filtered   = _f
    histo     += _h
    del _h 
    
    return filtered , histo 
Ejemplo n.º 5
0
def addChoppingResponse(
        chain,  ## input dataset to be updated
        chopper,  ## chopping category/formula 
        N,  ## number of categrories
        inputs,  ## input variables 
        weights_files,  ## files with TMVA weigths (tar/gz or xml)
        category_name='chopping',  ## category name 
        prefix='tmva_',  ## prefix for TMVA-variable         
        suffix='_response',  ## suffix for TMVA-variable 
        options='',  ## TMVA-reader options
        verbose=True,  ## verbosity flag 
        aux=0.9):
    """
    Helper function to add TMVA/chopping  response into dataset
    >>> tar_file = trainer.tar_file
    >>> chain    = ...
    >>> inputs   = [ 'var1' , 'var2' , 'var2' ] ## input varibales to TMVA 
    >>> addChoppingResponse ( chain , chopper ,  inputs , tar_file , prefix = 'tmva_' )
    """

    from ostap.tools.chopping import addChoppingResponse as _add_response_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    else:
        return _add_response_(dataset=chain,
                              chopper=chopper,
                              N=N,
                              inputs=inputs,
                              weights_files=weights_files,
                              prefix=prefix,
                              suffix=suffix,
                              options=options,
                              verbose=verbose,
                              aux=aux)

    from ostap.trees.trees import Chain
    ch = Chain(chain)

    task = AddChopping(chopper=chopper,
                       N=N,
                       inputs=inputs,
                       weights_files=weights_files,
                       prefix=prefix,
                       suffix=suffix,
                       options=options,
                       verbose=verbose,
                       aux=aux)

    wmgr = WorkManager(silent=False)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    return nc
Ejemplo n.º 6
0
def addTMVAResponse(
        chain,  ## input chain 
        inputs,  ## input variables 
        weights_files,  ## files with TMVA weigths (tar/gz or xml)
        prefix='tmva_',  ## prefix for TMVA-variable 
        suffix='_response',  ## suffix for TMVA-variable
        options='',  ## TMVA-reader options
        verbose=True,  ## verbosity flag 
        aux=0.9,
        **kwargs):  ## for Cuts method : efficiency cut-off
    """
    Helper function to add TMVA  response into loong TChain
    >>> tar_file = trainer.tar_file
    >>> dataset  = ...
    >>> inputs = [ 'var1' , 'var2' , 'var2' ]
    >>> dataset.addTMVAResponse (  inputs , tar_file , prefix = 'tmva_' )
    """
    from ostap.tools.tmva import addTMVAResponse as _add_response_

    if isinstance(chain, ROOT.TChain) and 1 < len(chain.files()): pass
    else:
        return _add_response_(dataset=chain,
                              inputs=inputs,
                              weights_files=weights_files,
                              prefix=prefix,
                              suffix=suffix,
                              verbose=verbose,
                              aux=aux)

    from ostap.trees.trees import Chain
    ch = Chain(chain)
    branches = set(chain.branches())

    ## create the task
    task = AddTMVA(inputs=inputs,
                   weights_files=weights_files,
                   prefix=prefix,
                   suffix=suffix,
                   verbose=verbose,
                   aux=aux)

    wmgr = WorkManager(silent=False, **kwargs)
    trees = ch.split(max_files=1)

    wmgr.process(task, trees)

    nc = ROOT.TChain(chain.name)
    for f in ch.files:
        nc.Add(f)

    nb = list(set(nc.branches()) - branches)
    if nb:
        logger.info('Added branches:\n%s' %
                    nc.table(variables=nb, prefix='# '))

    return nc
Ejemplo n.º 7
0
def pprocess(
        chain,
        selector,
        nevents=-1,
        first=0,
        shortcut=True,  ## important 
        chunk_size=100000,  ## important 
        max_files=5,
        ppservers=(),
        use_frame=20000,  ## important 
        silent=False):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain

    ch = Chain(chain)

    selection = selector.selection
    variables = selector.variables

    ## trivial   = selector.trivial_vars and not selector.morecuts

    trivial = selector.really_trivial and not selector.morecuts

    all = 0 == first and (0 > nevents or len(chain) <= nevents)

    if all and trivial and 1 < len(ch.files):
        logger.info(
            "Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1

    task = FillTask(variables, selection, trivial, use_frame)
    wmgr = WorkManager(ppservers=ppservers, silent=silent)
    trees = ch.split(chunk_size=chunk_size, max_files=max_files)
    wmgr.process(task, trees)
    del trees

    dataset, stat = task.results()

    selector.data = dataset
    selector.stat = stat

    from ostap.logger.logger import attention
    skipped = 'Skipped:%d' % stat.skipped
    skipped = '/' + attention(skipped) if stat.skipped else ''
    logger.info(
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' %
        (selector.name, stat.processed, stat.total, skipped, selector.cuts(),
         dataset))

    return 1
Ejemplo n.º 8
0
def pStatVar(chain,
             what,
             cuts='',
             nevents=-1,
             first=0,
             chunk_size=250000,
             max_files=1,
             silent=True,
             **kwargs):
    """ Parallel processing of loooong chain/tree 
    >>> chain    = ...
    >>> chain.pstatVar( 'mass' , 'pt>1') 
    """
    ## few special/trivial cases

    print('I am pStatVar')

    last = min(n_large, first + nevents if 0 < nevents else n_large)

    if 0 <= first and 0 < nevents < chunk_size:
        print('I am pStatVar/0')
        return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TChain):
        if 1 == chain.nFiles() and len(chain) < chunk_size:
            print('I am pStatVar/1')
            return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size:
        print('I am pStatVar/2')
        return chain.statVar(what, cuts, first, last)

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nevents)

    task = StatVarTask(what, cuts)
    wmgr = WorkManager(silent=silent, **kwargs)

    trees = ch.split(chunk_size=chunk_size, max_files=max_files)

    print('statvar-pprocess', chain.GetName(), len(trees))
    wmgr.process(task, trees)

    del trees
    del ch

    results = task.results()

    return results
Ejemplo n.º 9
0
def _pprocess_(chain,
               selector,
               nevents=-1,
               first=0,
               shortcut=True,
               chunk_size=100000,
               ppservers=(),
               silent=False):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain

    ch = Chain(chain)

    selection = selector.selection
    variables = selector.variables
    trivial = selector.trivial

    all = 0 == first and (0 > nevents or len(chain) <= nevents)

    if all and trivial and 1 < len(ch.files):
        logger.info(
            "Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1

    task = FillTask(variables, selection, trivial)
    wmgr = Parallel.WorkManager(ppservers=ppservers, silent=silent)
    wmgr.process(task, ch.split(chunk_size=chunk_size))

    dataset, stat = task.output

    selector.data = dataset
    selector.stat = stat

    from ostap.logger.logger import attention
    skipped = 'Skipped:%d' % stat[2]
    skipped = '/' + attention(skipped) if stat[2] else ''
    logger.info(
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n# %s' %
        (selector.name, stat[1], stat[0], skipped, selector.cuts(), dataset))

    return 1
Ejemplo n.º 10
0
def pStatVar(chain,
             what,
             cuts='',
             nevents=-1,
             first=0,
             chunk_size=100000,
             max_files=10,
             ppservers=(),
             silent=True):
    """ Parallel processing of loooong chain/tree 
    >>> chain    = ...
    >>> chain.pstatVar( 'mass' , 'pt>1') 
    """

    ## few special/trivial cases

    last = min(n_large, first + nevents if 0 < nevents else n_large)

    if 0 <= first and 0 < nevents < chunk_size:
        return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TChain):
        if chain.nFiles() < 5 and len(chain) < chunk_size:
            return chain.statVar(what, cuts, first, last)
    elif isinstance(chain, ROOT.TTree) and len(chain) < chunk_size:
        return chain.statVar(what, cuts, first, last)

    from ostap.trees.trees import Chain
    ch = Chain(chain, first=first, nevents=nevents)

    task = StatVarTask(what, cuts)
    wmgr = WorkManager(ppservers=ppservers, silent=silent)

    trees = ch.split(chunk_size=chunk_size, max_files=max_files)

    wmgr.process(task, trees)

    del trees
    del ch

    results = task.results()

    return results
Ejemplo n.º 11
0
def parallel_fill ( chain                  ,
                    selector               ,
                    nevents      = -1      ,
                    first        = 0       ,
                    shortcut     = True    ,   ## important 
                    chunk_size   = 1000000 ,   ## important 
                    max_files    = 5       ,
                    use_frame    =  20000  ,   ## important 
                    silent       = False   ,
                    job_chunk    = -1      , **kwargs ) :
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """
    import ostap.fitting.roofit 
    from   ostap.fitting.pyselectors import SelectorWithVars 
    from   ostap.trees.trees         import Chain
    
    assert isinstance ( selector , SelectorWithVars ) , \
           "Invalid type of ``selector'': %s" % type ( selector ) 
    
    ch = Chain ( chain ) 

    selection = selector.selection
    variables = selector.variables
    roo_cuts  = selector.roo_cuts
    
    ## trivial   = selector.trivial_vars and not selector.morecuts
    
    trivial   = selector.really_trivial and not selector.morecuts 
    
    all = 0 == first and ( 0 > nevents or len ( chain ) <= nevents )
    
    if all and trivial and 1 < len( ch.files ) :
        logger.info ("Configuration is ``trivial'': redefine ``chunk-size'' to -1")
        chunk_size = -1
        
    task  = FillTask ( variables = variables ,
                       selection = selection ,
                       roo_cuts  = roo_cuts  ,
                       trivial   = trivial   ,
                       use_frame = use_frame )
    
    wmgr  = WorkManager ( silent     = silent     , **kwargs )
    trees = ch.split    ( chunk_size = chunk_size , max_files = max_files )
    wmgr.process( task , trees , chunk_size = job_chunk )
    del trees
    
    dataset, stat = task.results()  

    selector.data = dataset
    selector.stat = stat 

    from ostap.logger.logger import attention 
    skipped = 'Skipped:%d' % stat.skipped
    skipped = '/' + attention ( skipped ) if stat.skipped else ''
    logger.info (
        'Selector(%s): Events Processed:%d/Total:%d%s CUTS: "%s"\n%s' % (
        selector.name    ,
        stat.processed   ,
        stat.total       ,
        skipped          ,
        selector.cuts()  , dataset.table ( prefix = '# ' ) ) )             
    
    return dataset, stat  
Ejemplo n.º 12
0
def reduce(chain,
           selection={},
           save_vars=(),
           new_vars={},
           no_vars=(),
           output='',
           name='',
           addselvars=False,
           silent=False,
           **kwargs):
    """ Parallel processing of loooong chain/tree 
    >>>chain    = ...
    >>> selector =  ...
    >>> chain.pprocess ( selector )
    """

    from ostap.trees.trees import Chain
    from ostap.frames.tree_reduce import ReduceTree

    if isinstance(chain, ROOT.TChain) and 1 >= len(chain.files()):
        return chain.reduce(selection=selection,
                            save_vars=save_vars,
                            new_vars=new_vars,
                            no_vars=no_vars,
                            output=output,
                            name=name,
                            addselvars=addselvars,
                            silent=silent)

    nb0 = len(chain.branches())
    ne0 = len(chain)

    ch = Chain(chain)

    task = ReduceTask(selection=selection,
                      save_vars=save_vars,
                      new_vars=new_vars,
                      addselvars=addselvars,
                      name=name)

    wmgr = WorkManager(silent=silent, **kwargs)
    trees = ch.split(max_files=1)
    wmgr.process(task, trees)

    result, table = task.results()
    for i in result.files:
        result.trash.add(i)

    if output:  ## merge results into single output file
        reduced = ReduceTree(result.chain,
                             selection='',
                             save_vars=(),
                             addselvars=False,
                             silent=True,
                             output=output,
                             name=name)

        result = Chain(reduced.chain)

    if not silent:
        from ostap.frames.frames import report_print_table
        title = 'Tree -> Frame -> Tree filter/transformation'
        logger.info('Reduce tree:\n%s' %
                    report_print_table(table, title, '# '))

        nb = len(result.chain.branches())
        ne = len(result.chain)
        f = float(nb0 * ne0) / (nb * ne)
        logger.info('reduce: (%dx%d) -> (%dx%d) %.1f (branches x entries) ' %
                    (nb0, ne0, nb, ne, f))

    return result