Python open Examples

Programming Language: Python

Namespace/Package Name: ostap.io.zipshelve

Method/Function: open

Examples at hotexamples.com: 12

Python open - 12 examples found. These are the top rated real world Python examples of ostap.io.zipshelve.open extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def graphs(self):
        """Get the dictionary of graphs
        """
        grphs = {}

        import ostap.histos.graphs
        with DBASE.open(self.dbase, 'r') as db:  ## READONLY

            for i in self.__vars:

                address = i[0]
                functions = db.get(address, ())
                if not functions: continue

                graph = ROOT.TGraphAsymmErrors(len(functions))
                for n, w in enumerate(functions):
                    if not hasattr(w, 'stat'): continue

                    cnt = w.stat()
                    wmn, wmx = cnt.minmax()
                    graph[n] = n, 0, 0, 1, 1 - wmn, wmx - 1

                grphs[address] = graph

        return grphs

Example #2

Show file

File: pidcalib1.py Project: OstapHEP/pidcalib

def runPidCalib(the_func, particle, stripping, polarity, trackcuts, **config):
    """ The basic function:
    - oversimplified version of MakePerfHistsRunRange.py script from Urania/PIDCalib 
    """
    #
    ## perform some arguments check
    #

    ## 1) check the stripping version
    from PIDPerfScripts.DataFuncs import CheckStripVer
    CheckStripVer(stripping)

    ## 2) set the magnet polarity  [not-needed, since embedded into parser]
    from PIDPerfScripts.DataFuncs import CheckMagPol
    CheckMagPol(polarity)

    ## 3) set the particle name [not-needed, since embedded into parser]
    from PIDPerfScripts.DataFuncs import CheckPartType
    CheckPartType(particle)

    runMin = config.get('RunMin', 0)
    runMax = config.get('RunMax', -1)
    verbose = config.get('Verbose', True)
    maxFiles = config.get('MaxFiles', -1)

    ## a bit strange treatment of runMax in PIDCalib :-(

    #
    ## finally call the standard PIDCalib machinery with user-specified function
    #
    histos = makePlots(the_func,
                       particle,
                       stripping,
                       polarity,
                       trackcuts,
                       runMin=runMin,
                       runMax=runMax,
                       verbose=verbose,
                       maxFiles=maxFiles,
                       parallel=config.get('Parallel', False))

    if config.get('dbname', None):

        try:
            import ostap.io.zipshelve as DBASE
            with DBASE.open(config['dbname']) as db:
                if verbose: logger.info('Save data into %s' % config['dbname'])
                ##
                key = 'PIDCalib(%s)@Stripping%s/%s' % (particle, stripping,
                                                       polarity)
                db[key] = histos
                db[key + 'Cuts'] = trackcuts
                if verbose: db.ls()

        except:
            logger.error('Unable to save data in DB')

    return histos

Example #3

Show file

    def __init__ ( self                   ,
                   dbase   = "weights.db" , ## the name of data base with the weights 
                   factors = []           ) :
        
        #
        ## make some statistic
        #
        self._counter = SE ()
        self._nzeroes = 0 

        self.vars = [] 
        if not factors : return

        ## open database 
        with DBASE.open ( dbase , 'r' ) as db : ## READONLY
            
            for k in db :
                e = db[k]
                if hasattr ( e , '__len__' ) :  
                    logger.debug( 'DBASE "%.15s" key "%.15s" #%d' % ( dbase ,  k, len( e ) ) ) 
                
            ## loop over the weighting factors and build the function
            for f in factors :

                funval  = f[0]  ## accessor to the variable 
                funname = f[1]  ## address  in database 

                if isinstance ( funval , str ) :
                    varnam = funval 
                    funval = lambda s : getattr ( s , varnam )
                    
                ## 
                functions  = db.get ( funname , [] ) ## db[ funname ]
                if not functions :
                    logger.warning('No reweighting is available for %s, skip it' % funname )

                merge = True
                if 2 < len ( f ) : merge = f[2] 
                
                if not isinstance (  functions , ( list , tuple ) ) :
                    functions = [ functions ]
                    
                ## merge list of functions into single function 
                if merge and 1 < len ( functions)  : 
                
                    single_func = functions[0] * functions [1] 
                    
                    for fun in functions [2:] :
                        single_func *= fun
                            
                    functions  = [ single_func ]
                    
                self.vars += [ ( funname , funval , functions , SE() ) ]

Example #4

Show file

File: test_tools_reweight2.py Project: bopopescu/ostap

## ix , iy  = 60 , 40
hmc = h2_axes([20.0 / ix * i for i in range(ix + 1)],
              [15.0 / iy * i for i in range(iy + 1)])

ix, iy = 60, 36
hmcx = h1_axis([20.0 / ix * i for i in range(ix + 1)])
hmcy = h1_axis([15.0 / iy * i for i in range(iy + 1)])

## prepare re-weighting machinery
maxIter = 25

## check database
import os
if not os.path.exists(dbname):
    logger.info('Create new weights DBASE')
    db = DBASE.open(dbname, 'c')  ##  create new empty db
    db.close()
else:
    logger.info('Existing weights DBASE will be used')

# =============================================================================
## make reweighting iterations

from ostap.tools.reweight import Weight, makeWeights, WeightingPlot, W2Data
from ostap.fitting.selectors import SelectorWithVars, Variable
import ostap.parallel.parallel_fill

# =============================================================================
## configuration of reweighting
weightings = (
    ## variable          address in DB

Example #5

Show file

File: reweight.py Project: Pro100Tema/ostap

    def __init__ ( self                   ,
                   dbase   = "weights.db" , ## the name of data base with the weights 
                   factors = []           ) :
        
        #
        ## make some statistic
        #
        self.__counter = SE ()
        self.__nzeroes = 0 

        self.__vars    = [] 
        if not factors : return

        ## open database 
        with DBASE.open ( dbase , 'r' ) as db : ## READONLY
            
            for k in db :
                e = db[k]
                if hasattr ( e , '__len__' ) :  
                    logger.debug( "DBASE ``%.15s'' key ``%.15s'' #%d" % ( dbase ,  k, len( e ) ) ) 
                
            ## loop over the weighting factors and build the function
            for wvar in factors :

                funval  = wvar.accessor  ## accessor to the variable 
                funname = wvar.address   ## address  in database 
                merge   = wvar.merge     ## merge sequence of callables?
                skip    = wvar.skip      ## skip   some of them?
                
                if isinstance ( funval , str ) :
                    ## funval = operator.attrgetter( funval ) 
                    funval = AttrGetter( funval ) 
                    
                ## 
                functions  = db.get ( funname , [] ) ## db[ funname ]
                if not functions :
                    logger.warning("No reweighting is available for ``%s'', skip it" % funname )
                    continue
                                
                if not isinstance (  functions , ( list , tuple ) ) :
                    functions = [ functions ]                    
                
                flen = len(functions) 
                if   0 < skip and skip      < flen :
                    logger.info  ("Use only %d first iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:skip] 
                elif 0 > skip and abs(skip) < flen :
                    logger.info  ("Skip last %d iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:-1*skip] 
                elif 0 == skip :
                    pass
                else :
                    logger.error("Invalid ``skip'' parameter %s/%d for ``%s''" % ( skip , flen , funname ) )
                
                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first     = True 
                for f in reversed ( functions ) :
                    if isinstance ( f , ROOT.TH1 ) and _first : 
                        ff = f.clone()
                        for i in ff :
                            v     = float ( ff[i] )
                            ff[i] = VE(v,0)
                        _functions.append ( ff  )                        
                        _first = False 
                    else :
                        _functions.append ( f  )
                        
                _functions.reverse() 
                functions = _functions
                    
                ## merge list of functions into single function 
                if merge and 1 < len ( functions)  : 
                            
                    ## single_func = functions[0] * functions [1] 
                    single_func = MULT ( functions[0] , functions [1] )
                    
                    for fun in functions [2:] :

                        ## multiply it                               
                        ## single_func *= fun
                        single_func = MULT ( single_func , fun )
                            
                    functions  = [ single_func ]
                    
                self.__vars += [ ( funname , funval , functions , SE() ) ]
                
        self.__vars = tuple ( self.__vars )

Example #6

Show file

File: reweight.py Project: Pro100Tema/ostap

def makeWeights  ( dataset                 ,
                   plots    = []           ,
                   database = "weights.db" ,
                   compare  = None         ,   ## comparison function 
                   delta    = 0.001        ,   ## delta for ``mean''  weigth variation
                   minmax   = 0.05         ,   ## delta for ``minmax'' weigth variation
                   power    = 0            ,   ## auto-determination
                   debug    = True         ) : ## save intermediate information in DB 

    assert 0 < delta  , "Reweighting: Invalid value for ``delta''  %s" % delta 
    assert 0 < minmax , "Reweighting: Invalid value for ``minmax'' %s" % minmax 

    power   = power if power >= 1 else len ( plots ) 

    nplots  = len ( plots )
    if 1 < nplots :
        import  math
        fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
        delta   = delta  * fudge_factor
        minmax  = minmax * fudge_factor
        

    save_to_db = [] 
    ## number of active plots for reweighting
    active = 0
    ## loop over plots 
    for wplot in plots  :
        
        what    = wplot.what       ## variable/function to plot/compare 
        how     = wplot.how        ## weight and/or additional cuts 
        address = wplot.address    ## address in database 
        hdata0  = wplot.data       ## original "DATA" object 
        hmc0    = wplot.mc_histo   ## original "MC"   histogram 
        ww      = wplot.w          ## relative weight 
        #
        # normailze the data
        #
        hdata = hdata0
        if isinstance ( hdata , ROOT.TH1 ) :  hdata = hdata.density ()
        
        #
        ## make a plot on (MC) data with the weight
        # 
        dataset.project ( hmc0 , what , how )
        
        st   = hmc0.stat()
        mnmx = st.minmax()
        if iszero ( mnmx[0] ) :
            logger.warning ( "Reweighting: statistic goes to zero %s/``%s''" % ( st , address ) ) 
            
        #
        ## normalize MC
        #
        hmc = hmc0.density() 
        
        #
        ## calculate  the reweighting factor : a bit conservative (?)
        # 
        #  this is the only important line
        #
        #  try to exploit finer binning if possible

        if len ( hmc ) >= len( hdata )  : w =  ( 1.0 / hmc ) * hdata ## NB!      
        else                            : w =  hdata / hmc           ## NB!
        
        ## scale & get the statistics of weights 
        w   /= w.stat().mean().value()
        cnt  = w.stat()
        #
        wvar = cnt.rms()/cnt.mean()
        logger.info ( 'Reweighting: %24s: mean/(min,max):%20s/(%.3f,%.3f) RMS:%s[%%]' %
                      ( "``" + address + "''"  ,
                        cnt.mean().toString('(%.2f+-%.2f)') ,
                        cnt.minmax()[0] ,
                        cnt.minmax()[1] , (wvar * 100).toString('(%.2f+-%.2f)') ) ) 
        #
        ## make decision based on the variance of weights 
        #
        mnw , mxw = cnt.minmax()
        if wvar.value() <= delta and abs ( mxw - mnw ) <= minmax : ## small variance? 
            logger.info("Reweighting: No more reweights for ``%s'' [%.2f%%]/[(%+.1f,%+.1f)%%]" % \
                        ( address , wvar * 100 , ( mnw - 1 ) * 100 ,  ( mxw - 1 ) * 100 ) )
            del w , hdata , hmc 
        else :
            save_to_db.append ( ( address , ww , hdata0 , hmc0 , hdata , hmc , w ) ) 
        #
        ## make a comparison (if needed)
        # 
        if compare : compare ( hdata0 , hmc0 , address )

    
    ## for single reweighting 
    if 1 == nplots : power = 1
    
    if power != nplots :
        logger.info ( "Reweighting: ``power'' is %g/#%d"  % ( power , nplots  ) )

    active = len ( save_to_db )
    if active !=  nplots :
        logger.info ( "Reweighting: number of ``active'' reweights %s/#%d"  % ( active , nplots ) )
        if database and save_to_db : 
            power += ( nplots - active )
            logger.info  ("Reweighting: ``power'' is changed to %g" %  power ) 
    
    while database and save_to_db :

        entry = save_to_db.pop() 
        
        address, ww , hd0, hm0, hd , hm , weight = entry  

        eff_exp = 1.0 / power
        if 1 != nplots and 1 != ww :
            eff_exp *= ww
            logger.info  ("Reweighting: apply ``effective exponent'' of %.3f for ``%s''" % ( eff_exp  , address ) )
            
        if 1 != eff_exp and 0 < eff_exp : 
            weight = weight ** eff_exp

        ## print 'WEIGHT stat', eff_exp, weight.stat()
        
        ## hmmmm... needed ? yes! 
        #if 1 < power : weight = weight ** ( 1.0 / power )
        
        ## relative importance
        #if 1 != ww :
        #    logger.info  ("Reweighting: apply ``relative importance factor'' of %.3g for ``'%s'" % ( ww , address ) )
        #    weight = weight ** ww 

        with DBASE.open ( database ) as db :
            
            db[address] = db.get( address , [] ) + [ weight ]
            
            if debug :
                addr        = address + ':REWEIGHTING'
                db [ addr ] = db.get ( addr , [] ) + list ( entry[2:] )
                
        del hd0, hm0 , hd , hm , weight , entry 
        
    return active

Example #7

Show file

def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.01,  ## delta for ``mean''  weight variation
        minmax=0.03,  ## delta for ``minmax'' weight variation
        power=None,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        make_plots=False,  ## make plots 
        tag="Reweighting"):
    """The main  function: perform one re-weighting iteration 
    and reweight ``MC''-data set to looks as ``data''(reference) dataset
    >>> results = makeWeights (
    ... dataset           , ## data source to be  reweighted (DataSet, TTree, abstract source)
    ... plots             , ## reweighting plots
    ... database          , ## datadabse to store/update reweigting results
    ... delta             , ## stopping criteria for `mean`    weight variation
    ... minmax            , ## stopping criteria for `min/max` weight variation
    ... power             , ## effective power to apply to the weigths
    ... debug      = True , ## store debuig information in database
    ... make_plots = True , ## produce useful comparison plots
    ... tag        = 'RW' ) ## tag for better printout
    
    If `make_plots = False`,  it returns the tuple of active reweitings:
    >>> active        = makeWeights ( ... , make_plots = False , ... )
    
    Otherwise it also returns list of comparison plots 
    >>> active, cmp_plots = makeWeights ( ... , make_plots = True  , ... )
    >>> for item in  cmp_plots :
    ...    what    = item.what
    ...    hdata   = item.data
    ...    hmc     = item.mc
    ...    hweight = item.weight
    
    If no more rewighting iteratios required, <code>active</code> is an empty tuple 
    """

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    nplots = len(plots)
    ## if 1 < nplots :
    ##     import  math
    ##     fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
    ##     delta   = delta  * fudge_factor
    ##     minmax  = minmax * fudge_factor

    ## list of plots to compare
    cmp_plots = []
    ## reweighting summary table
    header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf',
              'ww', 'exp')

    rows = {}
    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        projector = wplot.projector  ## projector for MC data
        ignore = wplot.ignore  ## ignore for weigtht building?
        #
        # normalize the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        hmc0 = projector(dataset, hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("%s: statistic goes to zero %s/``%s''" %
                           (tag, st, address))
        elif mnmx[0] <= 0:
            logger.warning("%s: statistic is negative  %s/``%s''" %
                           (tag, st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1)

        if   hboth and 1 == hmc.dim () and 1 == hdata.dim () and \
               len ( hmc ) >= len( hdata ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) and \
                 ( hmc.binsz() >= hdata.binsz() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #

        c2ndf = 0
        for i in w:
            c2ndf += w[i].chi2(1.0)
        c2ndf /= (len(w) - 1)

        ## build  the row in the summary table
        row = address  ,  \
              '%-5.3f/%5.3f' % ( cnt.minmax()[0]    , cnt.minmax()[1] ) , \
              allright ( '+' ) if good2 else attention ( '-' ) , \
              (wvar * 100).toString('%6.2f+-%-6.2f') , \
              allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf

        ## make plots at the start of  each iteration?
        if make_plots:
            item = ComparisonPlot(what, hdata, hmc, w)
            cmp_plots.append(item)

        row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else ''])

        rows[address] = row

        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if (not good) and (not ignore):  ## small variance?
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))

        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    active = tuple([p[0] for p in save_to_db])
    nactive = len(active)

    if power and callable(power):
        eff_exp = power(nactive)
    elif isinstance(power, num_types) and 0 < power <= 1.5:
        eff_exp = 1.0 * power
    elif 1 == nactive and 1 < len(plots):
        eff_exp = 0.95
    elif 1 == nactive:
        eff_exp = 1.00
    else:
        eff_exp = 1.10 / max(nactive, 1)

    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        ## avoid too large or too small  weights
        for i in weight:
            w = weight[i]
            if w.value() < 0.5:
                weight[i] = VE(0.5, w.cov2())
            elif w.value() > 2.0:
                weight[i] = VE(2.0, w.cov2())

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp
            row = list(rows[address])
            row.append('%4.3f' % eff_exp)
            rows[address] = tuple(row)

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    table = [header]
    for row in rows:
        table.append(rows[row])

    import ostap.logger.table as Table
    logger.info(
        '%s, active:#%d \n%s ' %
        (tag, nactive,
         Table.table(table, title=tag, prefix='# ', alignment='lccccccc')))

    cmp_plots = tuple(cmp_plots)
    return (active, cmp_plots) if make_plots else active

Example #8

Show file

    def __init__(
            self,
            dbase="weights.db",  ## the name of data base with the weights 
            factors=[]):

        #
        ## make some statistic
        #
        self.__counter = SE()
        self.__nzeroes = 0

        self.__vars = []
        if not factors: return
        self.__dbase = dbase

        ## open database

        self.__table = [('Reweighting', 'accessor', '#', 'merged?', 'skip')]
        rows = []

        with DBASE.open(dbase, 'r') as db:  ## READONLY

            logger.debug('Reweigting database: \n%s' % db.table(prefix='# '))

            ## loop over the weighting factors and build the function
            for wvar in factors:

                funval = wvar.accessor  ## accessor to the variable
                funname = wvar.address  ## address  in database
                merge = wvar.merge  ## merge sequence of callables?
                skip = wvar.skip  ## skip   some of them?

                row = []

                row.append(funname)

                if isinstance(funval, str):
                    row.append(funval)
                    ## funval = operator.attrgetter( funval )
                    funval = AttrGetter(funval)
                elif isinstance(funval, AttrGetter):
                    atts = funval.attributes
                    if 1 == len(atts): atts = atts[0]
                    row.append(str(atts))
                else:
                    row.append('')

                ##
                functions = db.get(funname, [])  ## db[ funname ]
                if not functions:
                    logger.warning(
                        "No reweighting is available for ``%s'', skip it" %
                        funname)
                    continue

                if not isinstance(functions, (list, tuple)):
                    functions = [functions]

                flen = len(functions)
                if 0 < skip and skip < flen:
                    logger.info("Use only %d first iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 > skip and abs(skip) < flen:
                    logger.info("Skip last %d iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 == skip:
                    pass
                else:
                    logger.error(
                        "Invalid ``skip'' parameter %s/%d for ``%s''" %
                        (skip, flen, funname))
                row.append('%d' % flen)

                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first = True
                for f in reversed(functions):
                    if isinstance(f, ROOT.TH1) and _first:
                        ff = f.clone()
                        for i in ff:
                            v = float(ff[i])
                            ff[i] = VE(v, 0)
                        _functions.append(ff)
                        _first = False
                    else:
                        _functions.append(f)

                _functions.reverse()
                functions = _functions

                row.append('+' if merge else '-')
                row.append('%s' % skip)

                ## merge list of functions into single function
                if merge and 1 < len(functions):

                    ## single_func = functions[0] * functions [1]
                    single_func = MULT(functions[0], functions[1])

                    for fun in functions[2:]:

                        ## multiply it
                        ## single_func *= fun
                        single_func = MULT(single_func, fun)

                    functions = [single_func]

                self.__vars += [(funname, funval, functions, SE())]

                self.__table.append(row)

        self.__vars = tuple(self.__vars)

Example #9

Show file

File: reweight.py Project: bopopescu/ostap

def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.001,  ## delta for ``mean''  weight variation
        minmax=0.05,  ## delta for ``minmax'' weight variation
        power=0,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        tag="Reweighting"):

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    power = power if power >= 1 else len(plots)

    nplots = len(plots)
    if 1 < nplots:
        import math
        fudge_factor = math.sqrt(1.0 / max(2.0, nplots - 1.0))
        delta = delta * fudge_factor
        minmax = minmax * fudge_factor

    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        #
        # normailze the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        dataset.project(hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("Reweighting: statistic goes to zero %s/``%s''" %
                           (st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        if   isinstance ( hmc   , ( ROOT.TH1F , ROOT.TH1D ) ) and \
           isinstance ( hdata , ( ROOT.TH1F , ROOT.TH1D ) )   and \
           len ( hmc ) >= len( hdata )                        :
            w = (1.0 / hmc) * hdata  ## NB!
            ## elif isinstance ( hmc   , ( ROOT.TH2F , ROOT.TH2D ) ) and \
            ##    isinstance ( hdata , ( ROOT.TH2F , ROOT.TH2D ) )   and \
            ##    len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \
            ##    len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB!
            ## elif isinstance ( hmc   , ( ROOT.TH3F , ROOT.TH3D ) ) and \
            ##    isinstance ( hdata , ( ROOT.TH3F , ROOT.TH3D ) )   and \
            ##    len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \
            ##    len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) and \
            ##    len ( hmc.GetZaxis() ) >= len( hdata.GetZaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #
        afunc1 = allright if good1 else attention
        afunc2 = allright if good2 else attention
        #
        message = "%s: %24s:" % (tag, address)
        message += ' ' + 'mean=%12s' % cnt.mean().toString('(%4.2f+-%4.2f)')
        message += ' ' + afunc2('min/max=%-5.3f/%5.3f' %
                                (cnt.minmax()[0], cnt.minmax()[1]))
        message += ' ' + afunc1('rms=%s[%%]' %
                                (wvar * 100).toString('(%4.2f+-%4.2f)'))
        logger.info(message)
        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if good:  ## small variance?
            message = "%s: No more reweights for %s" % (tag, address)
            message += ' ' + allright("min/max/rms=%+3.1f/%+3.1f/%3.1f[%%]" %
                                      ((mnw - 1) * 100,
                                       (mxw - 1) * 100, 100 * wvar))
            logger.info(message)
            del w, hdata, hmc
        else:
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))
        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    ## for single reweighting
    ## if 1 == nplots : power = 1

    ## if power != nplots :
    #    logger.info ( "%s: ``power'' is %g/#%d"  % ( tag , power , nplots  ) )

    active = [p[0] for p in save_to_db]
    all = [p.address for p in plots]
    for i, a in enumerate(all):
        if a in active:
            if isatty(): all[i] = attention(a)
            else: all[i] = '*' + a + '*'
        else:
            if isatty(): all[i] = allright(a)

    logger.info("%s: reweights are: %s" % (tag, (', '.join(all))))

    ## if len ( active ) != nplots :
    ##    if database and save_to_db :
    ##        power += ( nplots - len ( active ) )
    ##        logger.info  ("%s: ``power'' is changed to %g" %  ( tag , power ) )

    nactive = len(active)
    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        ## eff_exp = 1.0  / power
        ## eff_exp = 0.95 / ( 1.0 * nactive ) ** 0.5

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        if 0.95 < mnw and mxw < 1.05:
            eff_exp = 0.75 if 1 < nactive else 1.50
        elif 0.90 < mnw and mxw < 1.10:
            eff_exp = 0.70 if 1 < nactive else 1.30
        elif 0.80 < mnw and mxw < 1.20:
            eff_exp = 0.65 if 1 < nactive else 1.25
        elif 0.70 < mnw and mxw < 1.30:
            eff_exp = 0.60 if 1 < nactive else 1.15
        elif 0.50 < mnw and mxw < 1.50:
            eff_exp = 0.55 if 1 < nactive else 1.10
        else:
            eff_exp = 0.50 if 1 < nactive else 1.0

        ## print 'effective exponent is:', eff_exp , address , mnw , mxw , (1.0/mnw)*mnw**eff_exp , (1.0/mxw)*mxw**eff_exp

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp

        ## print 'WEIGHT stat', eff_exp, weight.stat()

        ## hmmmm... needed ? yes!
        #if 1 < power : weight = weight ** ( 1.0 / power )

        ## relative importance
        #if 1 != ww :
        #    logger.info  ("%s: apply ``relative importance factor'' of %.3g for ``'%s'" % ( tag , ww , address ) )
        #    weight = weight ** ww

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    return active

Example #10

Show file

def test_shelves():
    
    db_sql_name  = CU.CleanUp.tempfile ( suffix = '.sqldb'  )  
    db_zip_name  = CU.CleanUp.tempfile ( suffix = '.zipdb'  ) 
    db_bz2_name  = CU.CleanUp.tempfile ( suffix = '.bz2db'  )
    db_root_name = CU.CleanUp.tempfile ( suffix = '.root'   )
    db_lz_name   = CU.CleanUp.tempfile ( suffix = '.lzmadb' )

    db_sql  = sqliteshelve.open ( db_sql_name  , 'c' )
    db_zip  = zipshelve.open    ( db_zip_name  , 'c' )
    db_bz2  = bz2shelve.open    ( db_bz2_name  , 'c' )
    db_root = rootshelve.open   ( db_root_name , 'c' )
    
    if lzshelve : db_lz = lzshelve.open ( db_lz_name , 'c' )
    else        : db_ls = None 
        
        
    for k in data :
        db_sql  [ k ] = data[k]
        db_zip  [ k ] = data[k]
        db_bz2  [ k ] = data[k]
        if lzshelve :
            db_lz  [ k ] = data[k]
        db_root [ k ] = data[k]
        
    logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) 
    logger.info('ZipShelve    #keys: %s' % len ( db_zip .keys() ) )
    logger.info('Bz2Shelve    #keys: %s' % len ( db_bz2 .keys() ) )
    logger.info('RootShelve   #keys: %s' % len ( db_root.keys() ) )
    if lzshelve :
        logger.info('LzShelve     #keys: %s' % len ( db_lz .keys() ) )

    db_sql  .close() 
    db_zip  .close()
    db_bz2  .close()
    db_root .close()
    if lzshelve : db_lz .close()

    logger.info('SQLiteShelve size: %d|%d ' % dbsize ( db_sql_name  ) ) 
    logger.info('ZipShelve    size: %d|%d ' % dbsize ( db_zip_name  ) )   
    logger.info('Bz2Shelve    size: %d|%d ' % dbsize ( db_bz2_name  ) ) 
    logger.info('RootShelve   size: %d|%d'  % dbsize ( db_root_name ) )  
    if lzshelve :
        logger.info('LzShelve     size: %d|%d ' % dbsize ( db_lz_name    ) ) 
    
    db_sql  = sqliteshelve.open    ( db_sql_name  , 'r' )
    db_zip  = zipshelve.open       ( db_zip_name  , 'r' )
    db_bz2  = bz2shelve.open       ( db_bz2_name  , 'r' )
    if lzshelve :
        db_lz  = lzshelve.open     ( db_lz_name   , 'r' )
    db_root = rootshelve.open      ( db_root_name , 'r' )

    logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) 
    logger.info('ZipShelve    #keys: %s' % len ( db_zip .keys() ) )
    logger.info('Bz2Shelve    #keys: %s' % len ( db_bz2 .keys() ) )
    if lzshelve :
        logger.info('LzShelve     #keys: %s' % len ( db_lz  .keys() ) )
    logger.info('RootShelve   #keys: %s' % len ( db_root.keys() ) )

    
    with timing ( 'h2-read/SQL'  ) : h2_sql  = db_sql  [ 'histo-2D']
    with timing ( 'h2_read/ZIP'  ) : h2_zip  = db_zip  [ 'histo-2D']
    with timing ( 'h2_read/BZ2'  ) : h2_bz2  = db_bz2  [ 'histo-2D']
    if lzshelve :
        with timing ( 'h2_read/LZ'  ) :
            h2_lz = db_lz  [ 'histo-2D']
    with timing ( 'h2_read/ROOT' ) : h2_root = db_root [ 'histo-2D']

    with timing ( 'tu-read/SQL'  ) : tu_sql  = db_sql  [ 'both'    ]
    with timing ( 'tu_read/ZIP'  ) : tu_zip  = db_zip  [ 'both'    ] 
    with timing ( 'tu_read/BZ2'  ) : tu_bz2  = db_bz2  [ 'both'    ] 
    if lzshelve :
        with timing ( 'tu_read/LZ'   ) :
            tu_lz   = db_lz   [ 'both'    ] 
    with timing ( 'tu_read/ROOT' ) : tu_root = db_root [ 'both'    ]

    with timing ( 'h1-read/SQL'  ) : h1_sql  = db_sql  [ 'histo-1D']
    with timing ( 'h1-read/ZIP'  ) : h1_zip  = db_zip  [ 'histo-1D']
    with timing ( 'h1-read/BZ2'  ) : h1_bz2  = db_bz2  [ 'histo-1D']
    if lzshelve : 
        with timing ( 'h1-read/LZ'   ) :
            h1_lz   = db_lz   [ 'histo-1D']
    with timing ( 'h1-read/ROOT' ) : h1_root = db_root [ 'histo-1D']

    for i in h1_sql : 
        v = h1_sql  [i] - h1_zip [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(1)!')
        v = h1_sql  [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(2)!')
        v = h1_root [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(3)!')
        v = h1_bz2  [i] - h1     [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 1D histogram(4)!')
        if lzshelve :
            v = h1_lz  [i] - h1     [i] 
            if not iszero ( v.value() ) :
                logger.error('Large difference for 1D histogram(5)!')
                
    
    for i in h2_sql : 
        v = h2_sql  [i] - h2_zip[i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(1)!')
        v = h2_sql  [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(2)!')
        v = h2_root [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(3)!')
        v = h2_bz2  [i] - h2    [i] 
        if not iszero ( v.value() ) :
            logger.error('Large difference for 2D histogram(4)!')
        if lzshelve :
            v = h2_lz  [i] - h2    [i] 
            if not iszero ( v.value() ) :
                logger.error('Large difference for 2D histogram(5)!')
            

    h1tq = tu_sql [1]
    h1tz = tu_zip [1]
    h1tr = tu_root[1]

    ## clone them 
    dbs = [ db_sql , db_zip , db_bz2 , db_root ]
    if lzshelve : dbs.append ( db_lz )
    
    for db in dbs :
        cdb = db.clone ( CU.CleanUp.tempfile ( suffix = '.db'  ) )
        logger.info('Cloned:')
        cdb.ls()
    del dbs 
        
                         
    with timing('Close SQL'  ) : db_sql .close() 
    with timing('Close ZIP'  ) : db_zip .close()
    with timing('Close BZ2'  ) : db_bz2 .close()
    if lzshelve : 
        with timing('Close LZ'   ) : db_lz  .close()
    with timing('Close ROOT' ) : db_root.close()


    for dbase in ( sqliteshelve . tmpdb () ,
                   zipshelve    . tmpdb () ,
                   bz2shelve    . tmpdb () ,
                   ## lzshelve     . tmpdb () ,
                   rootshelve   . tmpdb () ) :

        with timing () :
            
            with dbase as db :
                
                db [ 'h1'    ] = h1
                db [ 'h2'    ] = h2
                db [ 'data'  ] = data
                db [ 'histos'] = data['histos']
                db.ls()

Example #11

Show file

File: test_io_shelves.py Project: Pro100Tema/ostap

def test_shelves():

    db_sql = sqliteshelve.open(db_sql_name, 'c')
    db_zip = zipshelve.open(db_zip_name, 'c')
    db_root = rootshelve.open(db_root_name, 'c')

    for k in data:
        db_sql[k] = data[k]
        db_zip[k] = data[k]
        db_root[k] = data[k]

    logger.info('SQLiteShelve keys: %s' % list(db_sql.keys()))
    logger.info('ZipShelve    keys: %s' % list(db_zip.keys()))
    logger.info('RootShelve   keys: %s' % list(db_root.keys()))

    db_sql.close()
    db_zip.close()
    db_root.close()

    logger.info('SQLiteShelve size: %d ' % os.path.getsize(db_sql_name))
    logger.info('ZipShelve    size: %d ' % os.path.getsize(db_zip_name))
    logger.info('RootShelve   size: %d ' % os.path.getsize(db_root_name))

    db_sql = sqliteshelve.open(db_sql_name, 'r')
    db_zip = zipshelve.open(db_zip_name, 'r')
    db_root = rootshelve.open(db_root_name, 'r')

    logger.info('SQLiteShelve keys: %s' % list(db_sql.keys()))
    logger.info('ZipShelve    keys: %s' % list(db_zip.keys()))
    logger.info('RootShelve   keys: %s' % list(db_root.keys()))

    with timing('h2-read/SQL'):
        h2_sql = db_sql['histo-2D']
    with timing('h2_read/ZIP'):
        h2_zip = db_zip['histo-2D']
    with timing('h2_read/ROOT'):
        h2_root = db_root['histo-2D']

    with timing('tu-read/SQL'):
        tu_sql = db_sql['both']
    with timing('tu_read/ZIP'):
        tu_zip = db_zip['both']
    with timing('tu_read/ROOT'):
        tu_root = db_root['both']

    with timing('h1-read/SQL'):
        h1_sql = db_sql['histo-1D']
    with timing('h1-read/ZIP'):
        h1_zip = db_zip['histo-1D']
    with timing('h1-read/ROOT'):
        h1_root = db_root['histo-1D']

    for i in h1_sql:
        v = h1_sql[i] - h1_zip[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(1)!')
        v = h1_sql[i] - h1[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(2)!')
        v = h1_root[i] - h1[i]
        if not iszero(v.value()):
            logger.error('Large difference for 1D histogram(3)!')

    for i in h2_sql:
        v = h2_sql[i] - h2_zip[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(1)!')
        v = h2_sql[i] - h2[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(2)!')
        v = h2_root[i] - h2[i]
        if not iszero(v.value()):
            logger.error('Large difference for 2D histogram(3)!')

    h1tq = tu_sql[1]
    h1tz = tu_zip[1]
    h1tr = tu_root[1]

    with timing('Close SQL'):
        db_sql.close()
    with timing('Close ZIP'):
        db_zip.close()
    with timing('Close ROOT'):
        db_root.close()

    with timing('Remove SQL'):
        os.remove(db_sql_name)
    with timing('Remove ZIP'):
        os.remove(db_zip_name)
    with timing('Remove ROOT'):
        os.remove(db_root_name)

    for dbase in (sqliteshelve.tmpdb(), zipshelve.tmpdb(), rootshelve.tmpdb()):

        with dbase as db:

            db['h1'] = h1
            db['h2'] = h2
            db.ls()

Example #12

Show file

def makeWeights  ( dataset                 ,
                   plots    = []           ,
                   database = "weights.db" ,
                   compare  = None         ,    ## comparison function 
                   delta    = 0.001        ,    ## delta for weigth variance 
                   debug    = True         ) :  ## save intermediate information in DB 

    more = False 
    ## loop over plots 
    for r in plots  :

        what    = r [0]         ## variable/function to plot/compare 
        how     = r [1]         ## weight or additional cuts 
        address = r [2]         ## address in database 
        hdata0  = r [3]                          .clone () ## original "DATA" histogram
        hmc0    = r [4] if 4 < len(r) else hdata0.clone () ## original "MC"   histogram 

        #
        ## black magic to take into account the difference in bins and normalizations
        #
        hdata = hdata0 
        if hasattr ( hdata , 'rescale_bins' ) : 
            hdata = hdata.rescale_bins ( 1.0   )
            
        ## normalize the data:
        hmean = None 
        if hasattr ( hdata , 'mean' ) and hasattr ( hdata , '__idiv__' ) :

            ## normalization point
            hmean  = hdata.mean()
            #
            if isinstance ( hdata , ROOT.TH2 ) : hdata /= hdata ( *hmean )
            else                               : hdata /= hdata (  hmean )

        #
        ## make a plot on (MC) data with the weight
        # 
        dataset.project ( hmc0 , what , how )

        st   = hmc0.stat()
        mnmx = st.minmax()
        if iszero ( mnmx[0] ) :
            logger.warning ( 'Statistic goes to zero %s/"%s"' % ( st , address ) ) 
            
        #
        ## black magic to take into account the difference in bins and normalizations
        # 
        hmc = hmc0.rescale_bins ( 1.0 )
        
        if hmean is None : pass
        else             :
            if isinstance ( hmc , ROOT.TH2 ) : hmc /= hmc ( *hmean )
            else                             : hmc /= hmc (  hmean )

        #
        ## calculate  the reweighting factor : a bit conservative (?)
        power = min ( 2.0 , len ( plots ) )                   ## NB!
        #  this is the only important line
        #  try to exploit finer binning if possible 
        if len ( hmc ) >= len( hdata )  : 
            w     = ( ( 1.0   / hmc ) * hdata ) ** ( 1.0 / power )  ## NB!
        else :
            w     = ( ( hdata / hmc )         ) ** ( 1.0 / power )  ## NB!
            
        #
        ## get the statistics of weights 
        #
        cnt  = w.stat()
        mnmx = cnt.minmax()
        if not mnmx [0] <= 1 <= mnmx[1] : w /= cnt.mean().value()
        cnt  = w.stat()
        #
        wvar = cnt.rms()/cnt.mean()
        logger.info ( 'Reweighting "%-.15s: Mean/minmax:%s/(%.4f,%.4f) Vars:%s[%%]' %
                      ( address         ,
                        cnt.mean()      ,
                        cnt.minmax()[0] ,
                        cnt.minmax()[1] , wvar * 100 ) ) 
        #
        ## make decision based on variance of weights 
        #
        if wvar.value() <= delta / len ( plots ) : ## small variance? 
            save = False
            logger.info("No more reweighting for %s [%.3f%%]" %  ( address , wvar * 100 ) ) 
        else            :
            save = True 

        #
        ## make a comparison (if needed)
        # 
        if compare :
            compare ( hdata0 , hmc0 , address )
        
        ## update data base 
        if save and database and address :
            with DBASE.open ( database ) as db :

                db[address] = db.get( address , [] ) + [ w ]
                
                if debug :
                    addr        = address + ':REWEIGHTING'
                    entry       = ( hdata0 , hmc0 , hdata , hmc , w ) 
                    db [ addr ] = db.get ( addr , [] ) + [ entry ]
                    
        ## 
        more = more or save

        del hdata0, hmc0, hdata, hmc, w  
        
    return more