Exemple #1
0
    def __init__ ( self                           ,
                   variables                      ,  ## list of variables  
                   selection                      ,  ## Tree-selection 
                   cuts         = None            ,
                   name         = ''              ,
                   fullname     = ''              ,
                   silence      = False           ) :
        
        if not     name :
            from   ostap.core.core import dsID 
            name = dsID()
            
        if not fullname : fullname = name 

        self.__name = name 
        #
        ## create the logger 
        #
        from ostap.logger.logger  import getLogger
        self.__logger = logger ## getLogger ( fullname ) 
        #
        self.__silence  = silence

        ##
        assert 0 < len(variables) , "Empty list of variables"
        #
        ## instantiate the base class
        # 
        SelectorWithCuts.__init__ ( self , selection ) ## initialize the base

        self.__cuts      = cuts
        self.__variables = [] 
        self.__varset    = ROOT.RooArgSet() 

        self.__triv_vars = True
        vvars = set() 
        for v in variables :

            vv = v 
            if   isinstance ( v , str              ) : vv = Variable (   v ) 
            elif isinstance ( v , ROOT.RooAbsReal  ) : vv = Variable (   v )
            elif isinstance ( v , ( tuple , list ) ) : vv = Variable (  *v )
            elif isinstance ( v , dict             ) : vv = Variable ( **v )
            elif isinstance ( v , Variable         ) : vv = v  

            assert isinstance  ( vv , Variable ), 'Invalid variable %s/%s' % ( vv , type ( vv ) )

            self.__variables.append ( vv     )
            self.__varset   .add    ( vv.var )
            #
            if   v.trivial and v.name == v.formula : pass
            elif v.formula                         : pass
            else                                   : self.__triv_vars = False
            #
            vvars.add ( vv ) 
            
        self.__variables = tuple( self.__variables ) 

        self.__triv_sel  = valid_formula ( selection , self.__varset ) 
        triv_cuts        = not cuts
        
        self.__trivial = self.__triv_vars and self.__triv_sel and triv_cuts
        if not silence :
            tv = allright ( 'True' )  if  self.__triv_vars else  attention ( 'False' )
            ts = allright ( 'True' )  if  self.__triv_sel  else  attention ( 'False' )
            tc = allright ( 'True' )  if  triv_cuts        else  attention ( 'False' )
            self.__logger.info ( "Suitable for fast processing: variables:%s, selection:%s, py-cuts:%s" % ( tv , ts , tc ) )
            
        if not self.__silence: 
            nl = 0
            dl = 0 
            for v in self.__variables :
                nl = max ( nl , len( v.name        ) ) 
                dl = max ( dl , len( v.description ) )                 
            dl = max ( dl , len ( 'Description' ) + 2 ) 
            nl = max ( nl , len ( 'Variable'    ) + 2 ) 
        
            line1    = '\n# | %%%ds | %%-%ds |         min / max         | Trivial? | ' % ( nl , dl ) 
            line2    = '\n# | %%%ds | %%-%ds | %%+11.3g / %%-+11.3g | %%s | ' % ( nl , dl )         
            the_line = 'Booked %d  variables:' % len ( self.variables ) 
            sep      = '\n# +%s+%s+%s+%s+' % ( (nl+2)*'-' , (dl+2)*'-' , 27*'-', 10*'-' )
            the_line += sep 
            the_line += line1 % ( 'Variable' , 'Description' )
            the_line += sep
            for v in self.__variables :
                trivial = allright ('True') + 4* ' ' if v.trivial else attention ( 'False' ) + 3 * ' '
                    
                fmt = line2 % ( v.name        , 
                                v.description ,
                                v.minmax[0]   ,
                                v.minmax[1]   ,
                                trivial       )
                the_line += fmt
            the_line += sep 
            self.__logger.info ( the_line )
            
        ## Book dataset
        self.__data = ROOT.RooDataSet (
            ##
            self.name ,
            fullname  , 
            ##
            self.__varset
            )
        
        #
        ## it is still very puzzling for me: should this line be here at all??
        ROOT.SetOwnership ( self.__data  , False )
        
        self.__progress = None 
        from collections import defaultdict
        self.__skip     = defaultdict(int)
        self.__notifier = None
        self.__stat    = [ 0 , 0 , 0 ] 
Exemple #2
0
    def Terminate ( self  ) :
        #
        if self.__progress :
            self.__progress.end() 
        #
        ## Aborted? 
        if   0 != self.GetAbort() :
            self.__logger.fatal('Selector(%s): process has been aborted!' % self.__name )

            self.__data = None 
            del self.__varset
            del self.__variables
            self.__varset     =  ()
            self.__variables  =  ()
            
            return  ## RETURN

        ##get total number of input events from base class 
        self.__stat[0] = self.event()
        
        if not self.__silence :
            skipped = 'Skipped:%d' % self.skipped
            skipped = '/' + attention ( skipped ) if self.skipped else ''
            cuts    = allright ( '"%s"' % self.cuts () ) if self.trivial_sel else attention ( '"%s"'  % self.cuts() ) 
            self.__logger.info (
                'Selector(%s): Events Total:%d/Processed:%d%s CUTS: %s' % (
                self.__name    ,
                self.total     ,
                self.processed ,
                skipped        , 
                cuts           ) )            
            self.__logger.info ( 'Selector(%s): dataset created:%s' %  ( self.__name ,  self.__data ) )
            
        if self.__data and not self.__silence :
            vars = []
            for v in self.__variables :
                s    = self.__data.statVar( v.name )
                mnmx = s.minmax ()
                mean = s.mean   ()
                rms  = s.rms    ()
                r    = ( v.name        ,                       ## 0 
                         v.description ,                       ## 1 
                         ('%+.5g' % mean.value() ).strip() ,   ## 2
                         ('%.5g'  % rms          ).strip() ,   ## 3 
                         ('%+.5g' % mnmx[0]      ).strip() ,   ## 4
                         ('%+.5g' % mnmx[1]      ).strip() )   ## 5
                s = self.__skip [ v.name] 
                if s : skip = '%-d' % s
                else : skip = '' 
                r +=  skip,                                    ## 6 
                vars.append ( r )

            vars.sort()
            
            name_l  = len ( 'Variable'    ) + 2 
            desc_l  = len ( 'Description' ) + 2 
            mean_l  = len ( 'mean' ) + 2 
            rms_l   = len ( 'rms'  ) + 2
            min_l   = len ( 'min'  ) + 2 
            max_l   = len ( 'max'  ) + 2 
            skip_l  = len ( 'Skip' ) 
            for v in vars :
                name_l = max ( name_l , len ( v[0] ) )
                desc_l = max ( desc_l , len ( v[1] ) )
                mean_l = max ( mean_l , len ( v[2] ) )
                rms_l  = max ( rms_l  , len ( v[3] ) )
                min_l  = max ( min_l  , len ( v[4] ) )
                max_l  = max ( max_l  , len ( v[5] ) )
                skip_l = max ( skip_l , len ( v[6] ) )

            sep      = '# -%s+%s+%s+%s+%s-' % ( ( name_l       + 2 ) * '-' ,
                                                ( desc_l       + 2 ) * '-' ,
                                                ( mean_l+rms_l + 5 ) * '-' ,
                                                ( min_l +max_l + 5 ) * '-' ,
                                                ( skip_l       + 2 ) * '-' )
            fmt = '#   %%%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds | %%-%ds   '  % (
                name_l ,
                desc_l ,
                mean_l ,
                rms_l  ,
                min_l  ,
                max_l  ,
                skip_l
                )
            
            report  = 'Dataset(%s) created:' % self.__name
            report += ' ' + allright ( '%s entries, %s variables' %  ( len ( self.__data ) , len ( self.variables ) ) )
            if self.trivial_vars : report += ' Vars:' + allright  ('trivial'       ) + ';'
            else                 : report += ' Vars:' + attention ('non-trivial'   ) + ';'
            if self.trivial_sel  : report += ' Cuts:' + allright  ('trivial'       ) + ';'
            else                 : report += ' Cuts:' + attention ('non-trivial'   ) + ';'
            if not self.__cuts   : report += ' '      + allright  ( 'no py-cuts'   )  
            else                 : report += ' '      + attention ( 'with py-cuts' )
                                                            
            header  = fmt % ( 'Variable'    ,
                              'Description' ,
                              'mean' ,
                              'rms'  ,
                              'min'  ,
                              'max'  ,
                              'skip' )
            report += '\n' + sep
            report += '\n' + header
            report += '\n' + sep            
            for v in vars :
                line    =  fmt % ( v[0] , v[1] , v[2] , v[3] , v[4] , v[5] , attention ( v[6] ) )
                report += '\n' + line  
            report += '\n' + sep
            self.__logger.info ( report ) 
        
        if not len ( self.__data ) :
            skip = 0
            for k,v in self.__skip.iteritems() : skip += v 
            self.__logger.warning("Selector(%s): empty dataset! Total:%s/Processed:%s/Skipped:%d"
                                  % ( self.__name  , self.total , self.processed , skip ) ) 
            
        ## attention: delete these

        del self.__varset
        del self.__variables
        
        self.__varset     =  ()
        self.__variables  =  ()
Exemple #3
0
def _ds_table_0_(dataset, variables=[], cuts='', first=0, last=2**62):
    """Print data set as table
    """
    varset = dataset.get()
    if not valid_pointer(varset):
        logger.error('Invalid dataset')
        return ''

    if isinstance(variables, str):
        variables = variables.strip()
        variables = variables.replace(',', ' ')
        variables = variables.replace(';', ' ')
        variables = variables.split()

    if 1 == len(variables): variables = variables[0]

    if isinstance(variables, str):

        if variables in varset:
            vars = [variables]
        else:
            vars = list(dataset.branches(variables))

    elif variables:
        vars = [i.GetName() for i in varset if i in variables]
    else:
        vars = [i.GetName() for i in varset]

    #
    _vars = []
    for v in vars:
        vv = getattr(varset, v)
        s = dataset.statVar(v, cuts, first, last)
        mnmx = s.minmax()
        mean = s.mean()
        rms = s.rms()
        r = (
            vv.GetName(),  ## 0 
            vv.GetTitle(),  ## 1 
            ('%+.5g' % mean.value()).strip(),  ## 2
            ('%.5g' % rms).strip(),  ## 3 
            ('%+.5g' % mnmx[0]).strip(),  ## 4
            ('%+.5g' % mnmx[1]).strip())  ## 5

        _vars.append(r)

    _vars.sort()

    report = '# %s("%s","%s"):' % (dataset.__class__.__name__,
                                   dataset.GetName(), dataset.GetTitle())
    report += allright('%d entries, %d variables' %
                       (len(dataset), len(varset)))

    if not _vars:
        return report, 120

    weight = None
    if isinstance(dataset, ROOT.RooDataHist):
        if dataset.isNonPoissonWeighted():
            report += attention(' Binned/Weighted')
        else:
            report += allright(' Binned')
    elif dataset.isWeighted():

        if dataset.isNonPoissonWeighted(): report += attention(' Weighted')
        else: report += attention(' Weighted(Poisson)')

        dstmp = None
        wvar = None

        ## 1) try to get the name of the weight variable
        store = dataset.store()

        if not valid_pointer(store): store = None

        if store and not isinstance(store, ROOT.RooTreeDataStore):
            dstmp = dataset.emptyClone()
            dstmp.convertToTreeStore()
            store = dstmp.store()
            if not valid_pointer(store): store = None

        if store and hasattr(store, 'tree') and valid_pointer(store.tree()):

            tree = store.tree()
            branches = set(tree.branches())
            vvars = set([i.GetName() for i in varset])
            wvars = branches - vvars

            if 1 == len(wvars):
                wvar = wvars.pop()

        if not wvar: wvar = Ostap.Utils.getWeight(dataset)
        if wvar: report += attention(' with "%s"' % wvar)

        store = None
        if dstmp:
            dstmp.reset()
            del dstmp
            dstmp = None

        ## 2) if weight name is known, try to get information about the weight
        if wvar:
            store = dataset.store()
            if not valid_pointer(store): store = None
            if store and not isinstance(store, ROOT.RooTreeDataStore):

                rargs = ROOT.RooFit.EventRange(first, last),
                if cuts:
                    ## need all variables
                    dstmp = dataset.reduce(ROOT.RooFit.Cut(cuts), *rargs)
                else:
                    ## enough to keep only 1 variable
                    vvs = ROOT.RooArgSet(varset[vars[0]])
                    dstmp = dataset.reduce(ROOT.RooFit.SelectVars(vvs), *rargs)

                dstmp.convertToTreeStore()
                store = dstmp.store()
                cuts, first, last = '', 0, 2**62

            if hasattr(store, 'tree') and valid_pointer(store.tree()):
                tree = store.tree()
                if wvar in tree.branches():
                    s = tree.statVar(wvar, cuts, first,
                                     last)  ## no cuts here...
                    mnmx = s.minmax()
                    mean = s.mean()
                    rms = s.rms()
                    weight = '*%s*' % wvar
                    r = (
                        weight,  ## 0 
                        'Weight variable',  ## 1 
                        ('%+.5g' % mean.value()).strip(),  ## 2
                        ('%.5g' % rms).strip(),  ## 3 
                        ('%+.5g' % mnmx[0]).strip(),  ## 4
                        ('%+.5g' % mnmx[1]).strip())  ## 5
                    _vars.append(r)
                    with_weight = True

            store = None
            if not dstmp is None:
                dstmp.reset()
                del dstmp
                dstmp = None

    # ==============================================================================================
    # build the actual table
    # ==============================================================================================

    name_l = len('Variable') + 2
    desc_l = len('Description') + 2
    mean_l = len('mean') + 2
    rms_l = len('rms') + 2
    min_l = len('min') + 2
    max_l = len('max') + 2
    for v in _vars:
        name_l = max(name_l, len(v[0]))
        desc_l = max(desc_l, len(v[1]))
        mean_l = max(mean_l, len(v[2]))
        rms_l = max(rms_l, len(v[3]))
        min_l = max(min_l, len(v[4]))
        max_l = max(max_l, len(v[5]))

    sep = '# +%s+%s+%s+%s+' % ((name_l + 2) * '-', (desc_l + 2) * '-',
                               (mean_l + rms_l + 5) * '-',
                               (min_l + max_l + 5) * '-')
    fmt = '# | %%-%ds | %%-%ds | %%%ds / %%-%ds | %%%ds / %%-%ds |' % (
        name_l, desc_l, mean_l, rms_l, min_l, max_l)

    header = fmt % ('Variable', 'Description', 'mean', 'rms', 'min', 'max')

    report += '\n' + sep
    report += '\n' + header
    report += '\n' + sep

    vlst = _vars

    if weight: vlst = _vars[:-1]

    for v in vlst:
        line = fmt % (v[0], v[1], v[2], v[3], v[4], v[5])
        report += '\n' + line
    report += '\n' + sep

    if weight:
        v = _vars[-1]
        line = fmt % (v[0], v[1], v[2], v[3], v[4], v[5])
        report += '\n' + line.replace(weight, attention(weight))
        report += '\n' + sep

    return report, len(sep)