Example #1
0
    def graphs(self):
        """Get the dictionary of graphs
        """
        grphs = {}

        import ostap.histos.graphs
        with DBASE.open(self.dbase, 'r') as db:  ## READONLY

            for i in self.__vars:

                address = i[0]
                functions = db.get(address, ())
                if not functions: continue

                graph = ROOT.TGraphAsymmErrors(len(functions))
                for n, w in enumerate(functions):
                    if not hasattr(w, 'stat'): continue

                    cnt = w.stat()
                    wmn, wmx = cnt.minmax()
                    graph[n] = n, 0, 0, 1, 1 - wmn, wmx - 1

                grphs[address] = graph

        return grphs
Example #2
0
def runPidCalib(the_func, particle, stripping, polarity, trackcuts, **config):
    """ The basic function:
    - oversimplified version of MakePerfHistsRunRange.py script from Urania/PIDCalib 
    """
    #
    ## perform some arguments check
    #

    ## 1) check the stripping version
    from PIDPerfScripts.DataFuncs import CheckStripVer
    CheckStripVer(stripping)

    ## 2) set the magnet polarity  [not-needed, since embedded into parser]
    from PIDPerfScripts.DataFuncs import CheckMagPol
    CheckMagPol(polarity)

    ## 3) set the particle name [not-needed, since embedded into parser]
    from PIDPerfScripts.DataFuncs import CheckPartType
    CheckPartType(particle)

    runMin = config.get('RunMin', 0)
    runMax = config.get('RunMax', -1)
    verbose = config.get('Verbose', True)
    maxFiles = config.get('MaxFiles', -1)

    ## a bit strange treatment of runMax in PIDCalib :-(

    #
    ## finally call the standard PIDCalib machinery with user-specified function
    #
    histos = makePlots(the_func,
                       particle,
                       stripping,
                       polarity,
                       trackcuts,
                       runMin=runMin,
                       runMax=runMax,
                       verbose=verbose,
                       maxFiles=maxFiles,
                       parallel=config.get('Parallel', False))

    if config.get('dbname', None):

        try:
            import ostap.io.zipshelve as DBASE
            with DBASE.open(config['dbname']) as db:
                if verbose: logger.info('Save data into %s' % config['dbname'])
                ##
                key = 'PIDCalib(%s)@Stripping%s/%s' % (particle, stripping,
                                                       polarity)
                db[key] = histos
                db[key + 'Cuts'] = trackcuts
                if verbose: db.ls()

        except:
            logger.error('Unable to save data in DB')

    return histos
Example #3
0
    def __init__ ( self                   ,
                   dbase   = "weights.db" , ## the name of data base with the weights 
                   factors = []           ) :
        
        #
        ## make some statistic
        #
        self._counter = SE ()
        self._nzeroes = 0 

        self.vars = [] 
        if not factors : return

        ## open database 
        with DBASE.open ( dbase , 'r' ) as db : ## READONLY
            
            for k in db :
                e = db[k]
                if hasattr ( e , '__len__' ) :  
                    logger.debug( 'DBASE "%.15s" key "%.15s" #%d' % ( dbase ,  k, len( e ) ) ) 
                
            ## loop over the weighting factors and build the function
            for f in factors :

                funval  = f[0]  ## accessor to the variable 
                funname = f[1]  ## address  in database 

                if isinstance ( funval , str ) :
                    varnam = funval 
                    funval = lambda s : getattr ( s , varnam )
                    
                ## 
                functions  = db.get ( funname , [] ) ## db[ funname ]
                if not functions :
                    logger.warning('No reweighting is available for %s, skip it' % funname )

                merge = True
                if 2 < len ( f ) : merge = f[2] 
                
                if not isinstance (  functions , ( list , tuple ) ) :
                    functions = [ functions ]
                    
                ## merge list of functions into single function 
                if merge and 1 < len ( functions)  : 
                
                    single_func = functions[0] * functions [1] 
                    
                    for fun in functions [2:] :
                        single_func *= fun
                            
                    functions  = [ single_func ]
                    
                self.vars += [ ( funname , funval , functions , SE() ) ]  
Example #4
0
## ix , iy  = 60 , 40
hmc = h2_axes([20.0 / ix * i for i in range(ix + 1)],
              [15.0 / iy * i for i in range(iy + 1)])

ix, iy = 60, 36
hmcx = h1_axis([20.0 / ix * i for i in range(ix + 1)])
hmcy = h1_axis([15.0 / iy * i for i in range(iy + 1)])

## prepare re-weighting machinery
maxIter = 25

## check database
import os
if not os.path.exists(dbname):
    logger.info('Create new weights DBASE')
    db = DBASE.open(dbname, 'c')  ##  create new empty db
    db.close()
else:
    logger.info('Existing weights DBASE will be used')

# =============================================================================
## make reweighting iterations

from ostap.tools.reweight import Weight, makeWeights, WeightingPlot, W2Data
from ostap.fitting.selectors import SelectorWithVars, Variable
import ostap.parallel.parallel_fill

# =============================================================================
## configuration of reweighting
weightings = (
    ## variable          address in DB
Example #5
0
    def __init__ ( self                   ,
                   dbase   = "weights.db" , ## the name of data base with the weights 
                   factors = []           ) :
        
        #
        ## make some statistic
        #
        self.__counter = SE ()
        self.__nzeroes = 0 

        self.__vars    = [] 
        if not factors : return

        ## open database 
        with DBASE.open ( dbase , 'r' ) as db : ## READONLY
            
            for k in db :
                e = db[k]
                if hasattr ( e , '__len__' ) :  
                    logger.debug( "DBASE ``%.15s'' key ``%.15s'' #%d" % ( dbase ,  k, len( e ) ) ) 
                
            ## loop over the weighting factors and build the function
            for wvar in factors :

                funval  = wvar.accessor  ## accessor to the variable 
                funname = wvar.address   ## address  in database 
                merge   = wvar.merge     ## merge sequence of callables?
                skip    = wvar.skip      ## skip   some of them?
                
                if isinstance ( funval , str ) :
                    ## funval = operator.attrgetter( funval ) 
                    funval = AttrGetter( funval ) 
                    
                ## 
                functions  = db.get ( funname , [] ) ## db[ funname ]
                if not functions :
                    logger.warning("No reweighting is available for ``%s'', skip it" % funname )
                    continue
                                
                if not isinstance (  functions , ( list , tuple ) ) :
                    functions = [ functions ]                    
                
                flen = len(functions) 
                if   0 < skip and skip      < flen :
                    logger.info  ("Use only %d first iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:skip] 
                elif 0 > skip and abs(skip) < flen :
                    logger.info  ("Skip last %d iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:-1*skip] 
                elif 0 == skip :
                    pass
                else :
                    logger.error("Invalid ``skip'' parameter %s/%d for ``%s''" % ( skip , flen , funname ) )
                
                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first     = True 
                for f in reversed ( functions ) :
                    if isinstance ( f , ROOT.TH1 ) and _first : 
                        ff = f.clone()
                        for i in ff :
                            v     = float ( ff[i] )
                            ff[i] = VE(v,0)
                        _functions.append ( ff  )                        
                        _first = False 
                    else :
                        _functions.append ( f  )
                        
                _functions.reverse() 
                functions = _functions
                    
                ## merge list of functions into single function 
                if merge and 1 < len ( functions)  : 
                            
                    ## single_func = functions[0] * functions [1] 
                    single_func = MULT ( functions[0] , functions [1] )
                    
                    for fun in functions [2:] :

                        ## multiply it                               
                        ## single_func *= fun
                        single_func = MULT ( single_func , fun )
                            
                    functions  = [ single_func ]
                    
                self.__vars += [ ( funname , funval , functions , SE() ) ]
                
        self.__vars = tuple ( self.__vars ) 
Example #6
0
def makeWeights  ( dataset                 ,
                   plots    = []           ,
                   database = "weights.db" ,
                   compare  = None         ,   ## comparison function 
                   delta    = 0.001        ,   ## delta for ``mean''  weigth variation
                   minmax   = 0.05         ,   ## delta for ``minmax'' weigth variation
                   power    = 0            ,   ## auto-determination
                   debug    = True         ) : ## save intermediate information in DB 

    assert 0 < delta  , "Reweighting: Invalid value for ``delta''  %s" % delta 
    assert 0 < minmax , "Reweighting: Invalid value for ``minmax'' %s" % minmax 

    power   = power if power >= 1 else len ( plots ) 

    nplots  = len ( plots )
    if 1 < nplots :
        import  math
        fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
        delta   = delta  * fudge_factor
        minmax  = minmax * fudge_factor
        

    save_to_db = [] 
    ## number of active plots for reweighting
    active = 0
    ## loop over plots 
    for wplot in plots  :
        
        what    = wplot.what       ## variable/function to plot/compare 
        how     = wplot.how        ## weight and/or additional cuts 
        address = wplot.address    ## address in database 
        hdata0  = wplot.data       ## original "DATA" object 
        hmc0    = wplot.mc_histo   ## original "MC"   histogram 
        ww      = wplot.w          ## relative weight 
        #
        # normailze the data
        #
        hdata = hdata0
        if isinstance ( hdata , ROOT.TH1 ) :  hdata = hdata.density ()
        
        #
        ## make a plot on (MC) data with the weight
        # 
        dataset.project ( hmc0 , what , how )
        
        st   = hmc0.stat()
        mnmx = st.minmax()
        if iszero ( mnmx[0] ) :
            logger.warning ( "Reweighting: statistic goes to zero %s/``%s''" % ( st , address ) ) 
            
        #
        ## normalize MC
        #
        hmc = hmc0.density() 
        
        #
        ## calculate  the reweighting factor : a bit conservative (?)
        # 
        #  this is the only important line
        #
        #  try to exploit finer binning if possible

        if len ( hmc ) >= len( hdata )  : w =  ( 1.0 / hmc ) * hdata ## NB!      
        else                            : w =  hdata / hmc           ## NB!
        
        ## scale & get the statistics of weights 
        w   /= w.stat().mean().value()
        cnt  = w.stat()
        #
        wvar = cnt.rms()/cnt.mean()
        logger.info ( 'Reweighting: %24s: mean/(min,max):%20s/(%.3f,%.3f) RMS:%s[%%]' %
                      ( "``" + address + "''"  ,
                        cnt.mean().toString('(%.2f+-%.2f)') ,
                        cnt.minmax()[0] ,
                        cnt.minmax()[1] , (wvar * 100).toString('(%.2f+-%.2f)') ) ) 
        #
        ## make decision based on the variance of weights 
        #
        mnw , mxw = cnt.minmax()
        if wvar.value() <= delta and abs ( mxw - mnw ) <= minmax : ## small variance? 
            logger.info("Reweighting: No more reweights for ``%s'' [%.2f%%]/[(%+.1f,%+.1f)%%]" % \
                        ( address , wvar * 100 , ( mnw - 1 ) * 100 ,  ( mxw - 1 ) * 100 ) )
            del w , hdata , hmc 
        else :
            save_to_db.append ( ( address , ww , hdata0 , hmc0 , hdata , hmc , w ) ) 
        #
        ## make a comparison (if needed)
        # 
        if compare : compare ( hdata0 , hmc0 , address )

    
    ## for single reweighting 
    if 1 == nplots : power = 1
    
    if power != nplots :
        logger.info ( "Reweighting: ``power'' is %g/#%d"  % ( power , nplots  ) )

    active = len ( save_to_db )
    if active !=  nplots :
        logger.info ( "Reweighting: number of ``active'' reweights %s/#%d"  % ( active , nplots ) )
        if database and save_to_db : 
            power += ( nplots - active )
            logger.info  ("Reweighting: ``power'' is changed to %g" %  power ) 
    
    while database and save_to_db :

        entry = save_to_db.pop() 
        
        address, ww , hd0, hm0, hd , hm , weight = entry  

        eff_exp = 1.0 / power
        if 1 != nplots and 1 != ww :
            eff_exp *= ww
            logger.info  ("Reweighting: apply ``effective exponent'' of %.3f for ``%s''" % ( eff_exp  , address ) )
            
        if 1 != eff_exp and 0 < eff_exp : 
            weight = weight ** eff_exp

        ## print 'WEIGHT stat', eff_exp, weight.stat()
        
        ## hmmmm... needed ? yes! 
        #if 1 < power : weight = weight ** ( 1.0 / power )
        
        ## relative importance
        #if 1 != ww :
        #    logger.info  ("Reweighting: apply ``relative importance factor'' of %.3g for ``'%s'" % ( ww , address ) )
        #    weight = weight ** ww 

        with DBASE.open ( database ) as db :
            
            db[address] = db.get( address , [] ) + [ weight ]
            
            if debug :
                addr        = address + ':REWEIGHTING'
                db [ addr ] = db.get ( addr , [] ) + list ( entry[2:] )
                
        del hd0, hm0 , hd , hm , weight , entry 
        
    return active 
Example #7
0
def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.01,  ## delta for ``mean''  weight variation
        minmax=0.03,  ## delta for ``minmax'' weight variation
        power=None,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        make_plots=False,  ## make plots 
        tag="Reweighting"):
    """The main  function: perform one re-weighting iteration 
    and reweight ``MC''-data set to looks as ``data''(reference) dataset
    >>> results = makeWeights (
    ... dataset           , ## data source to be  reweighted (DataSet, TTree, abstract source)
    ... plots             , ## reweighting plots
    ... database          , ## datadabse to store/update reweigting results
    ... delta             , ## stopping criteria for `mean`    weight variation
    ... minmax            , ## stopping criteria for `min/max` weight variation
    ... power             , ## effective power to apply to the weigths
    ... debug      = True , ## store debuig information in database
    ... make_plots = True , ## produce useful comparison plots
    ... tag        = 'RW' ) ## tag for better printout
    
    If `make_plots = False`,  it returns the tuple of active reweitings:
    >>> active        = makeWeights ( ... , make_plots = False , ... )
    
    Otherwise it also returns list of comparison plots 
    >>> active, cmp_plots = makeWeights ( ... , make_plots = True  , ... )
    >>> for item in  cmp_plots :
    ...    what    = item.what
    ...    hdata   = item.data
    ...    hmc     = item.mc
    ...    hweight = item.weight
    
    If no more rewighting iteratios required, <code>active</code> is an empty tuple 
    """

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    nplots = len(plots)
    ## if 1 < nplots :
    ##     import  math
    ##     fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
    ##     delta   = delta  * fudge_factor
    ##     minmax  = minmax * fudge_factor

    ## list of plots to compare
    cmp_plots = []
    ## reweighting summary table
    header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf',
              'ww', 'exp')

    rows = {}
    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        projector = wplot.projector  ## projector for MC data
        ignore = wplot.ignore  ## ignore for weigtht building?
        #
        # normalize the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        hmc0 = projector(dataset, hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("%s: statistic goes to zero %s/``%s''" %
                           (tag, st, address))
        elif mnmx[0] <= 0:
            logger.warning("%s: statistic is negative  %s/``%s''" %
                           (tag, st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1)

        if   hboth and 1 == hmc.dim () and 1 == hdata.dim () and \
               len ( hmc ) >= len( hdata ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) and \
                 ( hmc.binsz() >= hdata.binsz() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #

        c2ndf = 0
        for i in w:
            c2ndf += w[i].chi2(1.0)
        c2ndf /= (len(w) - 1)

        ## build  the row in the summary table
        row = address  ,  \
              '%-5.3f/%5.3f' % ( cnt.minmax()[0]    , cnt.minmax()[1] ) , \
              allright ( '+' ) if good2 else attention ( '-' ) , \
              (wvar * 100).toString('%6.2f+-%-6.2f') , \
              allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf

        ## make plots at the start of  each iteration?
        if make_plots:
            item = ComparisonPlot(what, hdata, hmc, w)
            cmp_plots.append(item)

        row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else ''])

        rows[address] = row

        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if (not good) and (not ignore):  ## small variance?
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))

        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    active = tuple([p[0] for p in save_to_db])
    nactive = len(active)

    if power and callable(power):
        eff_exp = power(nactive)
    elif isinstance(power, num_types) and 0 < power <= 1.5:
        eff_exp = 1.0 * power
    elif 1 == nactive and 1 < len(plots):
        eff_exp = 0.95
    elif 1 == nactive:
        eff_exp = 1.00
    else:
        eff_exp = 1.10 / max(nactive, 1)

    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        ## avoid too large or too small  weights
        for i in weight:
            w = weight[i]
            if w.value() < 0.5:
                weight[i] = VE(0.5, w.cov2())
            elif w.value() > 2.0:
                weight[i] = VE(2.0, w.cov2())

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp
            row = list(rows[address])
            row.append('%4.3f' % eff_exp)
            rows[address] = tuple(row)

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    table = [header]
    for row in rows:
        table.append(rows[row])

    import ostap.logger.table as Table
    logger.info(
        '%s, active:#%d \n%s ' %
        (tag, nactive,
         Table.table(table, title=tag, prefix='# ', alignment='lccccccc')))

    cmp_plots = tuple(cmp_plots)
    return (active, cmp_plots) if make_plots else active
Example #8
0
    def __init__(
            self,
            dbase="weights.db",  ## the name of data base with the weights 
            factors=[]):

        #
        ## make some statistic
        #
        self.__counter = SE()
        self.__nzeroes = 0

        self.__vars = []
        if not factors: return
        self.__dbase = dbase

        ## open database

        self.__table = [('Reweighting', 'accessor', '#', 'merged?', 'skip')]
        rows = []

        with DBASE.open(dbase, 'r') as db:  ## READONLY

            logger.debug('Reweigting database: \n%s' % db.table(prefix='# '))

            ## loop over the weighting factors and build the function
            for wvar in factors:

                funval = wvar.accessor  ## accessor to the variable
                funname = wvar.address  ## address  in database
                merge = wvar.merge  ## merge sequence of callables?
                skip = wvar.skip  ## skip   some of them?

                row = []

                row.append(funname)

                if isinstance(funval, str):
                    row.append(funval)
                    ## funval = operator.attrgetter( funval )
                    funval = AttrGetter(funval)
                elif isinstance(funval, AttrGetter):
                    atts = funval.attributes
                    if 1 == len(atts): atts = atts[0]
                    row.append(str(atts))
                else:
                    row.append('')

                ##
                functions = db.get(funname, [])  ## db[ funname ]
                if not functions:
                    logger.warning(
                        "No reweighting is available for ``%s'', skip it" %
                        funname)
                    continue

                if not isinstance(functions, (list, tuple)):
                    functions = [functions]

                flen = len(functions)
                if 0 < skip and skip < flen:
                    logger.info("Use only %d first iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 > skip and abs(skip) < flen:
                    logger.info("Skip last %d iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 == skip:
                    pass
                else:
                    logger.error(
                        "Invalid ``skip'' parameter %s/%d for ``%s''" %
                        (skip, flen, funname))
                row.append('%d' % flen)

                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first = True
                for f in reversed(functions):
                    if isinstance(f, ROOT.TH1) and _first:
                        ff = f.clone()
                        for i in ff:
                            v = float(ff[i])
                            ff[i] = VE(v, 0)
                        _functions.append(ff)
                        _first = False
                    else:
                        _functions.append(f)

                _functions.reverse()
                functions = _functions

                row.append('+' if merge else '-')
                row.append('%s' % skip)

                ## merge list of functions into single function
                if merge and 1 < len(functions):

                    ## single_func = functions[0] * functions [1]
                    single_func = MULT(functions[0], functions[1])

                    for fun in functions[2:]:

                        ## multiply it
                        ## single_func *= fun
                        single_func = MULT(single_func, fun)

                    functions = [single_func]

                self.__vars += [(funname, funval, functions, SE())]

                self.__table.append(row)

        self.__vars = tuple(self.__vars)
Example #9
0
def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.001,  ## delta for ``mean''  weight variation
        minmax=0.05,  ## delta for ``minmax'' weight variation
        power=0,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        tag="Reweighting"):

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    power = power if power >= 1 else len(plots)

    nplots = len(plots)
    if 1 < nplots:
        import math
        fudge_factor = math.sqrt(1.0 / max(2.0, nplots - 1.0))
        delta = delta * fudge_factor
        minmax = minmax * fudge_factor

    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        #
        # normailze the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        dataset.project(hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("Reweighting: statistic goes to zero %s/``%s''" %
                           (st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        if   isinstance ( hmc   , ( ROOT.TH1F , ROOT.TH1D ) ) and \
           isinstance ( hdata , ( ROOT.TH1F , ROOT.TH1D ) )   and \
           len ( hmc ) >= len( hdata )                        :
            w = (1.0 / hmc) * hdata  ## NB!
            ## elif isinstance ( hmc   , ( ROOT.TH2F , ROOT.TH2D ) ) and \
            ##    isinstance ( hdata , ( ROOT.TH2F , ROOT.TH2D ) )   and \
            ##    len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \
            ##    len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB!
            ## elif isinstance ( hmc   , ( ROOT.TH3F , ROOT.TH3D ) ) and \
            ##    isinstance ( hdata , ( ROOT.TH3F , ROOT.TH3D ) )   and \
            ##    len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \
            ##    len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) and \
            ##    len ( hmc.GetZaxis() ) >= len( hdata.GetZaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #
        afunc1 = allright if good1 else attention
        afunc2 = allright if good2 else attention
        #
        message = "%s: %24s:" % (tag, address)
        message += ' ' + 'mean=%12s' % cnt.mean().toString('(%4.2f+-%4.2f)')
        message += ' ' + afunc2('min/max=%-5.3f/%5.3f' %
                                (cnt.minmax()[0], cnt.minmax()[1]))
        message += ' ' + afunc1('rms=%s[%%]' %
                                (wvar * 100).toString('(%4.2f+-%4.2f)'))
        logger.info(message)
        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if good:  ## small variance?
            message = "%s: No more reweights for %s" % (tag, address)
            message += ' ' + allright("min/max/rms=%+3.1f/%+3.1f/%3.1f[%%]" %
                                      ((mnw - 1) * 100,
                                       (mxw - 1) * 100, 100 * wvar))
            logger.info(message)
            del w, hdata, hmc
        else:
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))
        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    ## for single reweighting
    ## if 1 == nplots : power = 1

    ## if power != nplots :
    #    logger.info ( "%s: ``power'' is %g/#%d"  % ( tag , power , nplots  ) )

    active = [p[0] for p in save_to_db]
    all = [p.address for p in plots]
    for i, a in enumerate(all):
        if a in active:
            if isatty(): all[i] = attention(a)
            else: all[i] = '*' + a + '*'
        else:
            if isatty(): all[i] = allright(a)

    logger.info("%s: reweights are: %s" % (tag, (', '.join(all))))

    ## if len ( active ) != nplots :
    ##    if database and save_to_db :
    ##        power += ( nplots - len ( active ) )
    ##        logger.info  ("%s: ``power'' is changed to %g" %  ( tag , power ) )

    nactive = len(active)
    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        ## eff_exp = 1.0  / power
        ## eff_exp = 0.95 / ( 1.0 * nactive ) ** 0.5

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        if 0.95 < mnw and mxw < 1.05:
            eff_exp = 0.75 if 1 < nactive else 1.50
        elif 0.90 < mnw and mxw < 1.10:
            eff_exp = 0.70 if 1 < nactive else 1.30
        elif 0.80 < mnw and mxw < 1.20:
            eff_exp = 0.65 if 1 < nactive else 1.25
        elif 0.70 < mnw and mxw < 1.30:
            eff_exp = 0.60 if 1 < nactive else 1.15
        elif 0.50 < mnw and mxw < 1.50:
            eff_exp = 0.55 if 1 < nactive else 1.10
        else:
            eff_exp = 0.50 if 1 < nactive else 1.0

        ## print 'effective exponent is:', eff_exp , address , mnw , mxw , (1.0/mnw)*mnw**eff_exp , (1.0/mxw)*mxw**eff_exp

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp

        ## print 'WEIGHT stat', eff_exp, weight.stat()

        ## hmmmm... needed ? yes!
        #if 1 < power : weight = weight ** ( 1.0 / power )

        ## relative importance
        #if 1 != ww :
        #    logger.info  ("%s: apply ``relative importance factor'' of %.3g for ``'%s'" % ( tag , ww , address ) )
        #    weight = weight ** ww

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    return active
Example #10
0
def test_shelves():
    
    db_sql_name  = CU.CleanUp.tempfile ( suffix = '.sqldb'  )  
    db_zip_name  = CU.CleanUp.tempfile ( suffix = '.zipdb'  ) 
    db_bz2_name  = CU.CleanUp.tempfile ( suffix = '.bz2db'  )
    db_root_name = CU.CleanUp.tempfile ( suffix = '.root'   )
    db_lz_name   = CU.CleanUp.tempfile ( suffix = '.lzmadb' )

    db_sql  = sqliteshelve.open ( db_sql_name  , 'c' )
    db_zip  = zipshelve.open    ( db_zip_name  , 'c' )
    db_bz2  = bz2shelve.open    ( db_bz2_name  , 'c' )
    db_root = rootshelve.open   ( db_root_name , 'c' )
    
    if lzshelve : db_lz = lzshelve.open ( db_lz_name , 'c' )
    else        : db_ls = None 
        
        
    for k in data :
        db_sql  [ k ] = data[k]
        db_zip  [ k ] = data[k]
        db_bz2  [ k ] = data[k]
        if lzshelve :
            db_lz  [ k ] = data[k]
        db_root [ k ] = data[k]
        
    logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) 
    logger.info('ZipShelve    #keys: %s' % len ( db_zip .keys() ) )
    logger.info('Bz2Shelve    #keys: %s' % len ( db_bz2 .keys() ) )
    logger.info('RootShelve   #keys: %s' % len ( db_root.keys() ) )
    if lzshelve :
        logger.info('LzShelve     #keys: %s' % len ( db_lz .keys() ) )

    db_sql  .close() 
    db_zip  .close()
    db_bz2  .close()
    db_root .close()
    if lzshelve : db_lz .close()

    logger.info('SQLiteShelve size: %d|%d ' % dbsize ( db_sql_name  ) ) 
    logger.info('ZipShelve    size: %d|%d ' % dbsize ( db_zip_name  ) )   
    logger.info('Bz2Shelve    size: %d|%d ' % dbsize ( db_bz2_name  ) ) 
    logger.info('RootShelve   size: %d|%d'  % dbsize ( db_root_name ) )  
    if lzshelve :
        logger.info('LzShelve     size: %d|%d ' % dbsize ( db_lz_name    ) ) 
    
    db_sql  = sqliteshelve.open    ( db_sql_name  , 'r' )
    db_zip  = zipshelve.open       ( db_zip_name  , 'r' )
    db_bz2  = bz2shelve.open       ( db_bz2_name  , 'r' )
    if lzshelve :
        db_lz  = lzshelve.open     ( db_lz_name   , 'r' )
    db_root = rootshelve.open      ( db_root_name , 'r' )

    logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) 
    logger.info('ZipShelve    #keys: %s' % len ( db_zip .keys() ) )
    logger.info('Bz2Shelve    #keys: %s' % len ( db_bz2 .keys() ) )
    if lzshelve :
        logger.info('LzShelve     #keys: %s' % len ( db_lz  .keys() ) )
    logger.info('RootShelve   #keys: %s' % len ( db_root.keys() ) )

    
    with timing ( 'h2-read/SQL'  ) : h2_sql  = db_sql  [ 'histo-2D']
    with timing ( 'h2_read/ZIP'  ) : h2_zip  = db_zip  [ 'histo-2D']
    with timing ( 'h2_read/BZ2'  ) : h2_bz2  = db_bz2  [ 'histo-2D']
    if lzshelve :
        with timing ( 'h2_read/LZ'  ) :
            h2_lz = db_lz  [ 'histo-2D']
    with timing ( 'h2_read/ROOT' ) : h2_root = db_root [ 'histo-2D']

    with timing ( 'tu-read/SQL'  ) : tu_sql  = db_sql  [ 'both'    ]
    with timing ( 'tu_read/ZIP'  ) : tu_zip  = db_zip  [ 'both'    ] 
    with timing ( 'tu_read/BZ2'  ) : tu_bz2  = db_bz2  [ 'both'    ] 
    if lzshelve :
        with timing ( 'tu_read/LZ'   ) :
            tu_lz   = db_lz   [ 'both'    ] 
    with timing ( 'tu_read/ROOT' ) : tu_root = db_root [ 'both'    ]

    with timing ( 'h1-read/SQL'  ) : h1_sql  = db_sql  [ 'histo-1D']
    with timing ( 'h1-read/ZIP'  ) : h1_zip  = db_zip  [ 'histo-1D']
    with timing ( 'h1-read/BZ2'  ) : h1_bz2  = db_bz2  [ 'histo-1D']
    if lzshelve : 
        with timing ( 'h1-read/LZ'   ) :
            h1_lz   = db_lz   [ 'histo-1D']
    with timing ( 'h1-read/ROOT' ) : h1_root = db_root [ 'histo-1D']

    for i in h1_sql : 
        v = h1_sql  [i] - h1_zip [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(1)!')
        v = h1_sql  [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(2)!')
        v = h1_root [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(3)!')
        v = h1_bz2  [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(4)!')
        if lzshelve :
            v = h1_lz  [i] - h1     [i] 
            if not iszero ( v.value() ) :
                logger.error('Large difference for 1D histogram(5)!')
                
    
    for i in h2_sql : 
        v = h2_sql  [i] - h2_zip[i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(1)!')
        v = h2_sql  [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(2)!')
        v = h2_root [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(3)!')
        v = h2_bz2  [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(4)!')
        if lzshelve :
            v = h2_lz  [i] - h2    [i] 
            if not iszero ( v.value() ) :
                logger.error('Large difference for 2D histogram(5)!')
            

    h1tq = tu_sql [1]
    h1tz = tu_zip [1]
    h1tr = tu_root[1]

    ## clone them 
    dbs = [ db_sql , db_zip , db_bz2 , db_root ]
    if lzshelve : dbs.append ( db_lz )
    
    for db in dbs :
        cdb = db.clone ( CU.CleanUp.tempfile ( suffix = '.db'  ) )
        logger.info('Cloned:')
        cdb.ls()
    del dbs 
        
                         
    with timing('Close SQL'  ) : db_sql .close() 
    with timing('Close ZIP'  ) : db_zip .close()
    with timing('Close BZ2'  ) : db_bz2 .close()
    if lzshelve : 
        with timing('Close LZ'   ) : db_lz  .close()
    with timing('Close ROOT' ) : db_root.close()


    for dbase in ( sqliteshelve . tmpdb () ,
                   zipshelve    . tmpdb () ,
                   bz2shelve    . tmpdb () ,
                   ## lzshelve     . tmpdb () ,
                   rootshelve   . tmpdb () ) :

        with timing () :
            
            with dbase as db :
                
                db [ 'h1'    ] = h1
                db [ 'h2'    ] = h2
                db [ 'data'  ] = data
                db [ 'histos'] = data['histos']
                db.ls()
Example #11
0
def test_shelves():

    db_sql = sqliteshelve.open(db_sql_name, 'c')
    db_zip = zipshelve.open(db_zip_name, 'c')
    db_root = rootshelve.open(db_root_name, 'c')

    for k in data:
        db_sql[k] = data[k]
        db_zip[k] = data[k]
        db_root[k] = data[k]

    logger.info('SQLiteShelve keys: %s' % list(db_sql.keys()))
    logger.info('ZipShelve    keys: %s' % list(db_zip.keys()))
    logger.info('RootShelve   keys: %s' % list(db_root.keys()))

    db_sql.close()
    db_zip.close()
    db_root.close()

    logger.info('SQLiteShelve size: %d ' % os.path.getsize(db_sql_name))
    logger.info('ZipShelve    size: %d ' % os.path.getsize(db_zip_name))
    logger.info('RootShelve   size: %d ' % os.path.getsize(db_root_name))

    db_sql = sqliteshelve.open(db_sql_name, 'r')
    db_zip = zipshelve.open(db_zip_name, 'r')
    db_root = rootshelve.open(db_root_name, 'r')

    logger.info('SQLiteShelve keys: %s' % list(db_sql.keys()))
    logger.info('ZipShelve    keys: %s' % list(db_zip.keys()))
    logger.info('RootShelve   keys: %s' % list(db_root.keys()))

    with timing('h2-read/SQL'):
        h2_sql = db_sql['histo-2D']
    with timing('h2_read/ZIP'):
        h2_zip = db_zip['histo-2D']
    with timing('h2_read/ROOT'):
        h2_root = db_root['histo-2D']

    with timing('tu-read/SQL'):
        tu_sql = db_sql['both']
    with timing('tu_read/ZIP'):
        tu_zip = db_zip['both']
    with timing('tu_read/ROOT'):
        tu_root = db_root['both']

    with timing('h1-read/SQL'):
        h1_sql = db_sql['histo-1D']
    with timing('h1-read/ZIP'):
        h1_zip = db_zip['histo-1D']
    with timing('h1-read/ROOT'):
        h1_root = db_root['histo-1D']

    for i in h1_sql:
        v = h1_sql[i] - h1_zip[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(1)!')
        v = h1_sql[i] - h1[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(2)!')
        v = h1_root[i] - h1[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(3)!')

    for i in h2_sql:
        v = h2_sql[i] - h2_zip[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(1)!')
        v = h2_sql[i] - h2[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(2)!')
        v = h2_root[i] - h2[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(3)!')

    h1tq = tu_sql[1]
    h1tz = tu_zip[1]
    h1tr = tu_root[1]

    with timing('Close SQL'):
        db_sql.close()
    with timing('Close ZIP'):
        db_zip.close()
    with timing('Close ROOT'):
        db_root.close()

    with timing('Remove SQL'):
        os.remove(db_sql_name)
    with timing('Remove ZIP'):
        os.remove(db_zip_name)
    with timing('Remove ROOT'):
        os.remove(db_root_name)

    for dbase in (sqliteshelve.tmpdb(), zipshelve.tmpdb(), rootshelve.tmpdb()):

        with dbase as db:

            db['h1'] = h1
            db['h2'] = h2
            db.ls()
Example #12
0
def makeWeights  ( dataset                 ,
                   plots    = []           ,
                   database = "weights.db" ,
                   compare  = None         ,    ## comparison function 
                   delta    = 0.001        ,    ## delta for weigth variance 
                   debug    = True         ) :  ## save intermediate information in DB 

    more = False 
    ## loop over plots 
    for r in plots  :

        what    = r [0]         ## variable/function to plot/compare 
        how     = r [1]         ## weight or additional cuts 
        address = r [2]         ## address in database 
        hdata0  = r [3]                          .clone () ## original "DATA" histogram
        hmc0    = r [4] if 4 < len(r) else hdata0.clone () ## original "MC"   histogram 

        #
        ## black magic to take into account the difference in bins and normalizations
        #
        hdata = hdata0 
        if hasattr ( hdata , 'rescale_bins' ) : 
            hdata = hdata.rescale_bins ( 1.0   )
            
        ## normalize the data:
        hmean = None 
        if hasattr ( hdata , 'mean' ) and hasattr ( hdata , '__idiv__' ) :

            ## normalization point
            hmean  = hdata.mean()
            #
            if isinstance ( hdata , ROOT.TH2 ) : hdata /= hdata ( *hmean )
            else                               : hdata /= hdata (  hmean )

        #
        ## make a plot on (MC) data with the weight
        # 
        dataset.project ( hmc0 , what , how )

        st   = hmc0.stat()
        mnmx = st.minmax()
        if iszero ( mnmx[0] ) :
            logger.warning ( 'Statistic goes to zero %s/"%s"' % ( st , address ) ) 
            
        #
        ## black magic to take into account the difference in bins and normalizations
        # 
        hmc = hmc0.rescale_bins ( 1.0 )
        
        if hmean is None : pass
        else             :
            if isinstance ( hmc , ROOT.TH2 ) : hmc /= hmc ( *hmean )
            else                             : hmc /= hmc (  hmean )

        #
        ## calculate  the reweighting factor : a bit conservative (?)
        power = min ( 2.0 , len ( plots ) )                   ## NB!
        #  this is the only important line
        #  try to exploit finer binning if possible 
        if len ( hmc ) >= len( hdata )  : 
            w     = ( ( 1.0   / hmc ) * hdata ) ** ( 1.0 / power )  ## NB!
        else :
            w     = ( ( hdata / hmc )         ) ** ( 1.0 / power )  ## NB!
            
        #
        ## get the statistics of weights 
        #
        cnt  = w.stat()
        mnmx = cnt.minmax()
        if not mnmx [0] <= 1 <= mnmx[1] : w /= cnt.mean().value()
        cnt  = w.stat()
        #
        wvar = cnt.rms()/cnt.mean()
        logger.info ( 'Reweighting "%-.15s: Mean/minmax:%s/(%.4f,%.4f) Vars:%s[%%]' %
                      ( address         ,
                        cnt.mean()      ,
                        cnt.minmax()[0] ,
                        cnt.minmax()[1] , wvar * 100 ) ) 
        #
        ## make decision based on variance of weights 
        #
        if wvar.value() <= delta / len ( plots ) : ## small variance? 
            save = False
            logger.info("No more reweighting for %s [%.3f%%]" %  ( address , wvar * 100 ) ) 
        else            :
            save = True 

        #
        ## make a comparison (if needed)
        # 
        if compare :
            compare ( hdata0 , hmc0 , address )
        
        ## update data base 
        if save and database and address :
            with DBASE.open ( database ) as db :

                db[address] = db.get( address , [] ) + [ w ]
                
                if debug :
                    addr        = address + ':REWEIGHTING'
                    entry       = ( hdata0 , hmc0 , hdata , hmc , w ) 
                    db [ addr ] = db.get ( addr , [] ) + [ entry ]
                    
        ## 
        more = more or save

        del hdata0, hmc0, hdata, hmc, w  
        
    return more