예제 #1
0
    def Process ( self, entry ):
        """ Fill data set 
        """
        #
        ## == getting the next entry from the tree
        #
        if self.GetEntry ( entry ) <=  0 : return 0             ## RETURN 
        #
        
        if not self.__progress and not self.__silence :
            self.__stat[0] =  self.fChain.GetEntries()
            self.__logger.info ( "Selector(%s): processing TChain('%s') #entries: %d" % ( self.name , self.fChain.GetName() , self.total ) )
            ## decoration:
            from ostap.utils.progress_bar import ProgressBar
            self.__progress = ProgressBar ( max_value = self.total     ,
                                            silent    = self.__silence )
            
        if not self.__silence :
            if 0 == self.processed % 1000 or 0 == entry % 1000 or 0 == self.event() % 1000 : 
                self.__progress.update_amount ( self.event () )
                
        self.__stat[1] += 1
        
        #
        ## == for more convenience
        #
        bamboo = self.fChain

        return  self.fill ( bamboo )
예제 #2
0
    def process(self, task, items, timeout=90000):
        if not isinstance(task,Task) :
            raise TypeError("task argument needs to be an 'Task' instance")
        # --- Call the Local initialialization
        task.initializeLocal()
        # --- Schedule all the jobs ....
        if self.mode == 'cluster' :
            jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('GaudiMP.Parallel','time')) for item in items]

            with ProgressBar ( max_value = len ( items ) , description = "# Job execution:" ,  silent = self.silent ) as bar :              
                for job in jobs :
                    result, stat = job()
                    task._mergeResults(result)
                    self._mergeStatistics(stat)
                    bar += 1
                    
            self._printStatistics()
            self.server.print_stats()
        elif self.mode == 'multicore' :
            start = time.time()
            jobs  = self.pool.map_async(_ppfunction, zip([task for i in items] , items ))
            
            with ProgressBar ( max_value = len ( items ) , description = "# Job execution:" ,  silent = self.silent ) as bar :              
                for result, stat in  jobs.get(timeout) :
                    task._mergeResults(result)
                    self._mergeStatistics(stat)
                    bar += 1
                    
            end = time.time()
            if not self.silent : 
                self._printStatistics()
                logger.info ( 'Time elapsed since server creation %f' % ( end - start ) ) 
        # --- Call the Local Finalize
        task.finalize()
예제 #3
0
파일: mp_gaudi.py 프로젝트: bopopescu/ostap
    def process(self, task, items, timeout=90000):
        if not isinstance(task, Task):
            raise TypeError("task argument needs to be an 'Task' instance")
        # --- Call the Local initialialization
        task.initialize_local()
        # --- Schedule all the jobs ....

        start = time.time()
        from itertools import repeat, count
        jobs = self.pool.map_async(_ppfunction,
                                   zip(repeat(task), count(), items))

        with ProgressBar(max_value=len(items),
                         description="# Job execution:",
                         silent=self.silent) as bar:
            for result, stat in jobs.get(timeout):
                task.merge_results(result)
                self.stats += stat
                bar += 1

        end = time.time()
        if not self.silent:
            self.print_statistics()
            logger.info('Time elapsed since server creation %f' %
                        (end - start))
        # --- Call the Local Finalize
        task.finalize()
        return task.results()
예제 #4
0
    def process(self, task, items, timeout=90000):
        if not isinstance(task, Task):
            raise TypeError("task argument needs to be an 'Task' instance")
        # --- Call the Local initialialization
        task.initializeLocal()
        # --- Schedule all the jobs ....
        if self.mode == 'cluster':

            from ostap.utils.progress_bar import ProgressBar
            with ProgressBar(max_value=len(items), silent=self.silent) as bar:

                jobs = self.pool.uimap(_ppfunction,
                                       zip([task for i in items], items))

                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('ROOT','Ostap.ParallelPathos')) for item in items]
                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('Ostap.Parallel','time')) for item in items]
                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (_ppfunction,), ('Ostap','time')) for item in items]
                for result, stat in jobs:
                    bar += 1
                    task._mergeResults(result)
                    self._mergeStatistics(stat)

            self._printStatistics()
            self.pp_stats()

        elif self.mode == 'multicore':

            start = time.time()
            from ostap.utils.progress_bar import ProgressBar
            with ProgressBar(max_value=len(items), silent=self.silent) as bar:
                jobs = self.pool.uimap(_ppfunction,
                                       zip([task for i in items], items))
                for result, stat in jobs:
                    bar += 1
                    task._mergeResults(result)
                    self._mergeStatistics(stat)
            end = time.time()

            self._printStatistics()
            logger.info('Time elapsed since server creation %f' %
                        (end - start))
        # --- Call the Local Finalize
        task.finalize()
예제 #5
0
    def __process_task  ( self , task , chunks , **kwargs ) :
        """Helper internal method to process the task with chunks of data 
        """
            
        from timeit import  default_timer as _timer
        start = _timer()

        ## inialize the task
        task.initialize_local ()
        
        ## mergers for statistics 
        merged_stat    = StatMerger ()
        merged_stat_pp = StatMerger ()

        ## start index for jobs
        index = 0 

        ## total number of jobs 
        njobs = sum  ( len ( c ) for c in chunks ) 
        from ostap.utils.progress_bar import ProgressBar
        with ProgressBar ( max_value = njobs , silent = self.silent ) as bar :

            while chunks :

                chunk = chunks.pop ( 0 ) 
                
                jobs_args = zip ( repeat ( task ) , count ( index ) , chunk )

                for jobid , result , stat in self.iexecute ( task_executor    ,
                                                             jobs_args        ,
                                                             progress = False ) :

                    ## merge statistics 
                    merged_stat += stat

                    ## merge/collect resuls
                    task.merge_results ( result , jobid )

                    bar += 1 

                index           += len ( chunk )
                
                pp_stat = self.get_pp_stat() 
                if pp_stat : merged_stat_pp  += pp_stat 

        ## finalize the task 
        task.finalize () 
        self.print_statistics ( merged_stat_pp , merged_stat , _timer() - start )
        ## 
        return task.results ()
예제 #6
0
    def __process(self, task, chunks, **kwargs):
        """Helper internal method to process the task with chunks of data 
        """

        from timeit import default_timer as _timer
        start = _timer()

        if isinstance(task, Task):
            kwargs.pop('merger', None)
            return self.__process_task(task, chunks, **kwargs)

        ## mergers for statistics
        merged_stat = StatMerger()
        merged_stat_pp = StatMerger()
        merger = kwargs.pop('merger', TaskMerger())

        njobs = sum(len(c) for c in chunks)
        from ostap.utils.progress_bar import ProgressBar
        with ProgressBar(max_value=njobs, silent=self.silent) as bar:

            while chunks:

                chunk = chunks.pop()

                from itertools import repeat, count
                jobs_args = zip(repeat(task), count(), chunk)

                self.pool.restart(True)
                jobs = self.pool.uimap(func_executor, jobs_args)
                del jobs_args

                for result, stat in jobs:
                    bar += 1
                    merged_stat += stat
                    merger += result

                    del result
                    del stat

                merged_stat_pp += self.get_pp_stat()
                self.pool.close()
                self.pool.join()

        ## finalize task
        what.finalize()
        self.print_statistics(merged_stat_pp, merged_stat, _timer() - start)
        ##
        return merger.results
예제 #7
0
    def add_files(self, files):
        """ Add files/patterns to data collector
        """

        if isinstance(files, str): files = [files]

        from ostap.utils.progress_bar import ProgressBar
        with ProgressBar(max_value=len(files), silent=self.silent) as bar:
            self.progress = bar
            for f in files:
                if 0 >= self.maxfiles: self.treatFile(f)
                elif len(self.files) < self.maxfiles: self.treatFile(f)
                else:
                    logger.debug('Maxfiles limit is reached %s ' %
                                 self.maxfiles)
                    break
예제 #8
0
파일: trees.py 프로젝트: mazurov/ostap
def _iter_cuts_ ( self , cuts , first = 0 , last = _large , progress = False ) :
    """Iterator over ``good events'' in TTree/TChain:
    
    >>> tree = ... # get the tree
    >>> for i in tree.withCuts ( 'pt>5' ) : print i.y
    
    Attention: TTree::GetEntry is already invoked for accepted events,
    no need in second call 
    """
    #
    last = min ( last , len ( self )  )
    
    pit = cpp.Ostap.PyIterator ( self , cuts , first , last )
    if not pit.ok() : raise TypeError ( "Invalid Formula: %s" % cuts )
    #
    from ostap.utils.progress_bar import ProgressBar 
    with ProgressBar ( min_value = first        ,
                       max_value = last         ,
                       silent    = not progress ) as bar :
        
        step = 13.0 * max ( bar.width , 101 ) / ( last - first ) 
        
        _t = pit.tree()
        _o = _t 
        while valid_pointer ( _t ) :

            yield _t
            _t      = pit.next()             ## advance to the next entry  

            if progress : 
                current = pit.current() - 1  ## get the current entry index 
                if not _t                          \
                       or  _t != _o                \
                       or current - first   < 120  \
                       or last    - current < 120  \
                       or 0 == current % 100000    \
                       or 0 == int ( step * ( current - first ) ) % 5  :
                    
                    ## show progress bar 
                    bar.update_amount( current )
                    _o = _t
                    
        if progress : bar.update_amount( last ) 

    del pit
    self.GetEntry(0)
예제 #9
0
    def __process_task(self, task, chunks, **kwargs):
        """Helper internal method to process the task with chunks of data 
        """
        assert isinstance(task, Task), 'Invalid task type  %s' % type(task)

        from timeit import default_timer as _timer
        start = _timer()

        ## inialize the task
        task.initialize_local()

        ## mergers for statistics
        merged_stat = StatMerger()
        merged_stat_pp = StatMerger()

        njobs = sum(len(c) for c in chunks)
        from ostap.utils.progress_bar import ProgressBar
        with ProgressBar(max_value=njobs, silent=self.silent) as bar:

            while chunks:

                chunk = chunks.pop()

                from itertools import repeat, count
                jobs_args = zip(repeat(task), count(), chunk)

                self.pool.restart(True)
                jobs = self.pool.uimap(task_executor, jobs_args)
                del jobs_args

                for result, stat in jobs:
                    bar += 1
                    merged_stat += stat
                    task.merge_results(result)

                    del result
                    del stat

                merged_stat_pp += self.get_pp_stat()
                self.pool.close()
                self.pool.join()

        task.finalize()
        self.print_statistics(merged_stat_pp, merged_stat, _timer() - start)
        ##
        return task.results()
예제 #10
0
    def add_files ( self , patterns ) :
        """ Add files/patterns to data collector
        """
        
        if isinstance ( patterns , str ) : patterns = [ patterns ]
        
        _files  = set ()
        for pattern in patterns :

            _added = False  
            for p in protocols :
                if p in pattern : 
                    if not pattern in self.files :
                        _files.add  ( pattern )
                    _added = True
                    break
                
            if not _added : 
                for f in glob.iglob ( pattern ) :
                    if not f in self.files :
                        _files.add ( f )
                        
        if not self.silent :
            logger.info ('Loading: %s  #patterns/files: %s/%d' % ( self.description ,
                                                                   len(patterns)    , 
                                                                   len( _files )    ) )
        ## update list of patterns
        self.patterns += patterns
        
        from ostap.utils.progress_bar import ProgressBar 
        with ProgressBar ( max_value = len(_files) , silent = self.silent ) as bar :
            self.progress = bar 
            for f in _files :
                if   0 >= self.maxfiles                 : self.treatFile ( f ) 
                elif len ( self.files ) < self.maxfiles : self.treatFile ( f )
                else :
                    logger.debug ('Maxfiles limit is reached %s ' % self.maxfiles )
                    break
                
        if not self.silent :
            logger.info ('Loaded: %s' % self )
예제 #11
0
    def add_files(self, files):
        """ Add files/patterns to data collector
        """

        if isinstance(files, str): files = [files]

        ## eliminate duplicates and sort
        files = list(set(files))
        files.sort()
        nf = len(files)
        max_value = nf if 0 >= self.maxfiles else min(nf, self.maxfiles)
        from ostap.utils.progress_bar import ProgressBar
        with ProgressBar(max_value=max_value, silent=self.silent) as bar:
            self.progress = bar
            for f in files:
                if 0 >= self.maxfiles: self.treatFile(f)
                elif len(self.files) < self.maxfiles: self.treatFile(f)
                else:
                    logger.debug('Maxfiles limit is reached %s ' %
                                 self.maxfiles)
                    break
예제 #12
0
파일: trees.py 프로젝트: mazurov/ostap
def _tc_call_ ( self , first = 0 , last = -1  , cuts = None , progress = False ) :
    """Iterator over ``good events'' in TTree/TChain:
    
    >>> tree = ... # get the tree
    >>> for i in tree(0, 100 , 'pt>5' ) : print i.y
    
    """
    #
    if last < 0 : last = ROOT.Tree.kMaxEntries
    
    last = min ( last , len ( self )  )

    from ostap.utils.progress_bar import ProgressBar 
    with ProgressBar ( min_value = first        ,
                       max_value = last         ,
                       silent    = not progress ) as bar :
        
        step = 13.0 * max ( bar.width , 101 ) / ( last - first ) 

        pit = 1 
        if cuts :
            
            pit = cpp.Ostap.PyIterator ( self , cuts , first , last )
            if not pit.ok() : raise TypeError ( "Invalid Formula: %s" % cuts )
            #
            
            _t = pit.tree()
            _o = _t 
            while valid_pointer ( _t ) :
                
                yield _t                         ## YIELD 
                _t      = pit.next()             ## advance to the next entry  
                
                if progress : 
                    current = pit.current() - 1  ## get the current entry index 
                    if not _t                          \
                           or  _t != _o                \
                           or current - first   < 120  \
                           or last    - current < 120  \
                           or 0 == current % 100000    \
                           or 0 == int ( step * ( current - first ) ) % 5  :
                        
                    ## show progress bar 
                        bar.update_amount( current )
                        _o = _t
        else :
            
            ## just explicit loop 
            for current in range ( first , last + 1 ) :
                
                if progress :
                    if     current - first   < 120  \
                           or last - current < 120  \
                           or 0 == current % 100000 \
                           or 0 == int ( step * ( current - first ) ) % 5  :
                        
                        bar.update_amount( current )
                        
                if 0 >= self.GetEntry ( current ) : break
                yield self                         ## YIELD! 
                
                    
        if progress : bar.update_amount( last ) 

    del pit
    self.GetEntry(0)
예제 #13
0
class SelectorWithVars(SelectorWithCuts) :
    """Create and fill the basic dataset for RooFit
    
    - Define the list of ``variables'' for selector:
    
    >>> variables = [ ... ]
    
    Add a variable 'my_name1' from the tree/chain:
    
    >>> variables += [ # name       descriptor         min-value , max-value  
    ...    Variable ( 'my_name1' , 'my_description1' , low       , high     ) ]
    
    Get a variable 'my_name' from the tree/chain using the accessor function, e.g. rescale it on-fligh:
    
    >>> variables += [ #  name       descriptor        min-value , max-value , access function   
    ...    Variable ( 'my_name2' , 'my_description2' , low       , high      , lambda s : s.my_name2/1000 ) ]
    
    Use less trivial expression:
    
    >>> variables += [ #  name       descriptor        min-value , max-value , access function   
    ...    Variable ( 'my_name3' , 'my_description3' , low       , high      , lambda s : s.var1+s.var2 ) ]
    
    Any callable that gets TChain/Tree and evaluates to double.
    ( useful case - e.g. it could be TMVAReader)
    
    >>> def myvar ( chain ) : ...
    >>> variables += [ #  name       descriptor        min-value , max-value , access function   
    ...    Variable ( 'my_name4' , 'my_description4' , low       , high      , myvar )  ]

    Use already booked variables:
    
    >>> v5 = ROOT.RooRealVal( 'my_name5' )
    >>> variables += [  Variable ( v5 , accessor = lambda s : s.var5 ) ]

    Add already booked variables:
    
    >>> v6 = ROOT.RooRealVal( 'my_name6' )
    >>> variables += [  Variable ( v6 ) ] ## get variable 'my_name6'

    - Finally create selector

    >>> selector = SelectorWithVars (
    ...       variables                             ,
    ...       selection = ' chi2vx<30 && pt>2*GeV ' ) ## filtering

    - Use selector to fill RooDataSet 
    >>> tree  = ...
    >>> chain.process ( selector )

    - Get dataset  from the selector 
    >>> dataset = selector.data   
    """
    ## constructor 
    def __init__ ( self                           ,
                   variables                      ,  ## list of variables  
                   selection                      ,  ## Tree-selection 
                   cuts         = None            ,
                   name         = ''              ,
                   fullname     = ''              ,
                   silence      = False           ) :
        
        if not     name :
            from   ostap.core.core import dsID 
            name = dsID()
            
        if not fullname : fullname = name 

        self.__name = name 
        #
        ## create the logger 
        #
        from ostap.logger.logger  import getLogger
        self.__logger = logger ## getLogger ( fullname ) 
        #
        self.__silence  = silence

        ##
        assert 0 < len(variables) , "Empty list of variables"
        #
        ## instantiate the base class
        # 
        SelectorWithCuts.__init__ ( self , selection ) ## initialize the base

        self.__cuts      = cuts
        self.__variables = [] 
        self.__varset    = ROOT.RooArgSet() 

        self.__triv_vars = True
        vvars = set() 
        for v in variables :

            vv = v 
            if   isinstance ( v , str              ) : vv = Variable (   v ) 
            elif isinstance ( v , ROOT.RooAbsReal  ) : vv = Variable (   v )
            elif isinstance ( v , ( tuple , list ) ) : vv = Variable (  *v )
            elif isinstance ( v , dict             ) : vv = Variable ( **v )
            elif isinstance ( v , Variable         ) : vv = v  

            assert isinstance  ( vv , Variable ), 'Invalid variable %s/%s' % ( vv , type ( vv ) )

            self.__variables.append ( vv     )
            self.__varset   .add    ( vv.var )
            #
            if   v.trivial and v.name == v.formula : pass
            elif v.formula                         : pass
            else                                   : self.__triv_vars = False
            #
            vvars.add ( vv ) 
            
        self.__variables = tuple( self.__variables ) 

        self.__triv_sel  = valid_formula ( selection , self.__varset ) 
        triv_cuts        = not cuts
        
        self.__trivial = self.__triv_vars and self.__triv_sel and triv_cuts
        if not silence :
            tv = allright ( 'True' )  if  self.__triv_vars else  attention ( 'False' )
            ts = allright ( 'True' )  if  self.__triv_sel  else  attention ( 'False' )
            tc = allright ( 'True' )  if  triv_cuts        else  attention ( 'False' )
            self.__logger.info ( "Suitable for fast processing: variables:%s, selection:%s, py-cuts:%s" % ( tv , ts , tc ) )
            
        if not self.__silence: 
            nl = 0
            dl = 0 
            for v in self.__variables :
                nl = max ( nl , len( v.name        ) ) 
                dl = max ( dl , len( v.description ) )                 
            dl = max ( dl , len ( 'Description' ) + 2 ) 
            nl = max ( nl , len ( 'Variable'    ) + 2 ) 
        
            line1    = '\n# | %%%ds | %%-%ds |         min / max         | Trivial? | ' % ( nl , dl ) 
            line2    = '\n# | %%%ds | %%-%ds | %%+11.3g / %%-+11.3g | %%s | ' % ( nl , dl )         
            the_line = 'Booked %d  variables:' % len ( self.variables ) 
            sep      = '\n# +%s+%s+%s+%s+' % ( (nl+2)*'-' , (dl+2)*'-' , 27*'-', 10*'-' )
            the_line += sep 
            the_line += line1 % ( 'Variable' , 'Description' )
            the_line += sep
            for v in self.__variables :
                trivial = allright ('True') + 4* ' ' if v.trivial else attention ( 'False' ) + 3 * ' '
                    
                fmt = line2 % ( v.name        , 
                                v.description ,
                                v.minmax[0]   ,
                                v.minmax[1]   ,
                                trivial       )
                the_line += fmt
            the_line += sep 
            self.__logger.info ( the_line )
            
        ## Book dataset
        self.__data = ROOT.RooDataSet (
            ##
            self.name ,
            fullname  , 
            ##
            self.__varset
            )
        
        #
        ## it is still very puzzling for me: should this line be here at all??
        ROOT.SetOwnership ( self.__data  , False )
        
        self.__progress = None 
        from collections import defaultdict
        self.__skip     = defaultdict(int)
        self.__notifier = None
        self.__stat    = [ 0 , 0 , 0 ] 

    @property 
    def name ( self ) :
        """``name''  - the name of selector/dataset"""
        return self.__name 
    
    @property 
    def data ( self ) :
        """``data''  - the dataset"""
        return self.__data
    @data.setter
    def data ( self , dataset ) :
        assert isinstance ( dataset , ROOT.RooAbsData ), \
               "Incorrect type of data %s/%s " % ( dataset ,   type ( dataset ) )
        self.__logger.debug ("Selector(%s), add dataset %s" % (  self.__name , dataset ) )
        self.__data = dataset 

    @property 
    def variables ( self ) :
        """``variables'' - the list/tuple of variables (cleared in Terminate)"""
        return self.__variables

    @property
    def varset ( self ) :
        """``varset'' : the structure of RooDataSet"""
        return self.__varset
    
    @property
    def morecuts ( self ) :
        """``morecuts'' -   additional cust to be applied in selection"""
        return self.__cuts

    @property
    def trivial_vars( self ) :
        """``trivial_vars'' : are all variables ``trivial'' (suitable for fast-processing)?"""
        return self.__triv_vars
    
    @property
    def trivial_sel( self ) :
        """``trivial_sel'' : is the selection ``trivial'' (suitable for fast-processing)?"""
        return self.__triv_sel
    
    @property
    def trivial ( self ) :
        """``trivial'' : Are variables/selection/cuts ``trivial'' (suitable for fast-processing)?"""
        return self.__trivial

    @property
    def skip ( self ) :
        """``skip'' : dictionary of skept entries"""
        return self.__skip
    
    @property
    def skipped ( self ) :
        """``skipped'' : total number of skept entries"""
        return self.__stat[2]
    
    @property
    def processed  ( self ) :
        """``processed'' : number of processeed events (after cuts)"""
        return self.__stat[1]
    
    @property
    def total  ( self ) :
        """``total'' : total number of processeed events (before cuts)"""
        return self.__stat[0]

    @property
    def stat ( self ) :
        """``stat'' : Total/processed/skipped events"""
        return tuple(self.__stat)
    @stat.setter
    def stat ( self , value  ) :
        assert 2<= len(value), 'Invalid "value":%s' % str ( value )
        self.__stat[0] = value[0]
        self.__stat[1] = value[1]
        self.__stat[2] = value[2]
        
        
    ## get the dataset 
    def dataset   ( self  ) :
        """ Get the data-set """ 
        return self.__data

    # =========================================================================
    ## the only one actually important method 
    def Process ( self, entry ):
        """ Fill data set 
        """
        #
        ## == getting the next entry from the tree
        #
        if self.GetEntry ( entry ) <=  0 : return 0             ## RETURN 
        #
        
        if not self.__progress and not self.__silence :
            self.__stat[0] =  self.fChain.GetEntries()
            self.__logger.info ( "Selector(%s): processing TChain('%s') #entries: %d" % ( self.name , self.fChain.GetName() , self.total ) )
            ## decoration:
            from ostap.utils.progress_bar import ProgressBar
            self.__progress = ProgressBar ( max_value = self.total     ,
                                            silent    = self.__silence )
            
        if not self.__silence :
            if 0 == self.processed % 1000 or 0 == entry % 1000 or 0 == self.event() % 1000 : 
                self.__progress.update_amount ( self.event () )
                
        self.__stat[1] += 1
        
        #
        ## == for more convenience
        #
        bamboo = self.fChain

        return  self.fill ( bamboo )

    # =========================================================================
    ## fill it! 
    def fill ( self , bamboo ) :
        """The  actual processing for the given ``bamboo''
        Note that   this method is independent on TTree/TChain and can be used directy
        One just needs to  ensure that:
        - 'accessor functions' for the variables and 'cuts' agree with the type of ``bamboo''
        """
        
        ## apply cuts (if needed) 
        if self.__cuts and not self. __cuts ( bamboo )  : return 0 
        
        ## loop over all variables
        for v in self.__variables :

            var       = v.var                ## The variable
            vmin,vmax = v.minmax             ## min/max range 
            vfun      = v.accessor           ## accessor function

            ## use the accessor function 
            value     = vfun ( bamboo )
            if not vmin <= value <= vmax :   ## MUST BE IN RANGE!
                self.__skip[v.name] += 1     ## SKIP EVENT
                self.__stat[2]      += 1     ## SKIP EVENT 
                return 0                     ## RETURN 

            var.setVal ( value ) 


        self.__data .add ( self.__varset )
        
        return 1 

    # =========================================================================
    ## ``callable'' interface 
    def __call__ ( self ,  entry ) :
        """``callable'' interface to Selector
        """
        return self.fill ( entry ) 

    ## termination 
    def Terminate ( self  ) :
        #
        if self.__progress :
            self.__progress.end() 
        #
        ## Aborted? 
        if   0 != self.GetAbort() :
            self.__logger.fatal('Selector(%s): process has been aborted!' % self.__name )

            self.__data = None 
            del self.__varset
            del self.__variables
            self.__varset     =  ()
            self.__variables  =  ()
            
            return  ## RETURN

        ##get total number of input events from base class 
        self.__stat[0] = self.event()
        
        if not self.__silence :
            skipped = 'Skipped:%d' % self.skipped
            skipped = '/' + attention ( skipped ) if self.skipped else ''
            cuts    = allright ( '"%s"' % self.cuts () ) if self.trivial_sel else attention ( '"%s"'  % self.cuts() ) 
            self.__logger.info (
                'Selector(%s): Events Total:%d/Processed:%d%s CUTS: %s' % (
                self.__name    ,
                self.total     ,
                self.processed ,
                skipped        , 
                cuts           ) )            
            self.__logger.info ( 'Selector(%s): dataset created:%s' %  ( self.__name ,  self.__data ) )
            
        if self.__data and not self.__silence :
            vars = []
            for v in self.__variables :
                s    = self.__data.statVar( v.name )
                mnmx = s.minmax ()
                mean = s.mean   ()
                rms  = s.rms    ()
                r    = ( v.name        ,                       ## 0 
                         v.description ,                       ## 1 
                         ('%+.5g' % mean.value() ).strip() ,   ## 2
                         ('%.5g'  % rms          ).strip() ,   ## 3 
                         ('%+.5g' % mnmx[0]      ).strip() ,   ## 4
                         ('%+.5g' % mnmx[1]      ).strip() )   ## 5
                s = self.__skip [ v.name] 
                if s : skip = '%-d' % s
                else : skip = '' 
                r +=  skip,                                    ## 6 
                vars.append ( r )

            vars.sort()
            
            name_l  = len ( 'Variable'    ) + 2 
            desc_l  = len ( 'Description' ) + 2 
            mean_l  = len ( 'mean' ) + 2 
            rms_l   = len ( 'rms'  ) + 2
            min_l   = len ( 'min'  ) + 2 
            max_l   = len ( 'max'  ) + 2 
            skip_l  = len ( 'Skip' ) 
            for v in vars :
                name_l = max ( name_l , len ( v[0] ) )
                desc_l = max ( desc_l , len ( v[1] ) )
                mean_l = max ( mean_l , len ( v[2] ) )
                rms_l  = max ( rms_l  , len ( v[3] ) )
                min_l  = max ( min_l  , len ( v[4] ) )
                max_l  = max ( max_l  , len ( v[5] ) )
                skip_l = max ( skip_l , len ( v[6] ) )

            sep      = '# -%s+%s+%s+%s+%s-' % ( ( name_l       + 2 ) * '-' ,
                                                ( desc_l       + 2 ) * '-' ,
                                                ( mean_l+rms_l + 5 ) * '-' ,
                                                ( min_l +max_l + 5 ) * '-' ,
                                                ( skip_l       + 2 ) * '-' )
            fmt = '#   %%%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds | %%-%ds   '  % (
                name_l ,
                desc_l ,
                mean_l ,
                rms_l  ,
                min_l  ,
                max_l  ,
                skip_l
                )
            
            report  = 'Dataset(%s) created:' % self.__name
            report += ' ' + allright ( '%s entries, %s variables' %  ( len ( self.__data ) , len ( self.variables ) ) )
            if self.trivial_vars : report += ' Vars:' + allright  ('trivial'       ) + ';'
            else                 : report += ' Vars:' + attention ('non-trivial'   ) + ';'
            if self.trivial_sel  : report += ' Cuts:' + allright  ('trivial'       ) + ';'
            else                 : report += ' Cuts:' + attention ('non-trivial'   ) + ';'
            if not self.__cuts   : report += ' '      + allright  ( 'no py-cuts'   )  
            else                 : report += ' '      + attention ( 'with py-cuts' )
                                                            
            header  = fmt % ( 'Variable'    ,
                              'Description' ,
                              'mean' ,
                              'rms'  ,
                              'min'  ,
                              'max'  ,
                              'skip' )
            report += '\n' + sep
            report += '\n' + header
            report += '\n' + sep            
            for v in vars :
                line    =  fmt % ( v[0] , v[1] , v[2] , v[3] , v[4] , v[5] , attention ( v[6] ) )
                report += '\n' + line  
            report += '\n' + sep
            self.__logger.info ( report ) 
        
        if not len ( self.__data ) :
            skip = 0
            for k,v in self.__skip.iteritems() : skip += v 
            self.__logger.warning("Selector(%s): empty dataset! Total:%s/Processed:%s/Skipped:%d"
                                  % ( self.__name  , self.total , self.processed , skip ) ) 
            
        ## attention: delete these

        del self.__varset
        del self.__variables
        
        self.__varset     =  ()
        self.__variables  =  ()

    def Init    ( self, chain ) :
        # 
        result = SelectorWithCuts.Init ( self , chain ) 

        if self.__progress and not self.__silence :
            self.__progress.update_amount ( self.event () )
        #
        return result 

    def Begin          ( self , tree = None ) :
        ## 
        result = SelectorWithCuts.Begin ( self , tree )

        if self.__progress and not self.__silence :
            self.__progress.update_amount ( self.event () )

        return result
    #
    def SlaveBegin     ( self , tree        ) :
        #
        result = SelectorWithCuts.SlaveBegin ( self , tree )
        #
        if self.__progress and not self.__silence :
            self.__progress.update_amount ( self.event () )
        #
        self.__stat[0] =  tree.GetEntries()
        #
        if self.__notifier :
            self.__notifier.exit()
            del self.__notifier
        
        self.__notifier = Ostap.Utils.Notifier( tree )
        for v in self.__variables :
            if isinstance ( v.accessor , ROOT.TObject ) :
                self.__notifier.add  ( v.accessor ) 
        
        return result 
    #
    def Notify         ( self ) :
        #
        result = SelectorWithCuts.Notify ( self )
        if self.__progress and not self.__silence :
            self.__progress.update_amount ( self.event () )

        return result 
            
    def SlaveTerminate ( self               ) :
        # 
        result = SelectorWithCuts.SlaveTerminate ( self )

        if self.__progress and not self.__silence :
            self.__progress.update_amount ( self.event () )

        if self.__notifier :
            self.__notifier.exit()
            self.__notifier = None  
            
        return result 
예제 #14
0
    def __process_func ( self , task , chunks  , **kwargs ) :
        """Helper internal method for parallel processiing of
        the plain function with chunks of data
        """
        from ostap.utils.cidict import cidict
        my_args = cidict( kwargs )
        
        from timeit import default_timer as _timer
        start = _timer()
        
        init      = my_args.pop ( 'init'      , None )
        merger    = my_args.pop ( 'merger'    , None )
        collector = my_args.pop ( 'collector' , None )
        
        ## mergers for statistics & results
        if   not merger and not collector :
            logger.warning ( "Neither ``merger'' nor ``collector'' are specified for merging!")
        elif     merger and     collector :
            logger.warning ( "Both    ``merger'' and ``collector'' are specified for merging!")
            
        ## mergers for statistics 
        merged_stat    = StatMerger ()
        merged_stat_pp = StatMerger ()

        ## start index for the jobs 
        index = 0

        ## initialize the results 
        results = init

        from ostap.utils.progress_bar import ProgressBar
        ## total number of jobs  
        njobs = sum  ( len ( c ) for c in chunks )
        with ProgressBar ( max_value = njobs , silent = self.silent ) as bar :
            
            while chunks :

                chunk = chunks.pop ( 0 ) 
                
                jobs_args = zip ( repeat ( task ) , count ( index ) , chunk )

                ## call for the actual jobs handling method 
                for jobid , result , stat in self.iexecute ( func_executor    ,
                                                             jobs_args        ,
                                                             progress = False ) :
                    
                    merged_stat += stat
                    
                    ## merge results if merger or collector are provided 
                    if   merger    : results = merger    ( results , result ) 
                    elif collector : results = collector ( results , result , jobid )
                    
                    bar += 1 

                index           += len ( chunk )
                
                pp_stat = self.get_pp_stat() 
                if pp_stat : merged_stat_pp  += pp_stat 

        ## print statistics 
        self.print_statistics ( merged_stat_pp , merged_stat , _timer() - start )
        ##
        return results