def test_summary2():

    conf = {
        'label_position': 5.6,
        'markersize': 1.2,
        'text_size': 0.07,
    }  ##  'line_width' : 2 }

    data = [
        Limit(2.1, label='Belle', **conf),
        Record(VE(3.4, 1.4**2), label='BaBar', **conf),
        Record(VE(2.46, 0.64**2), 0.29, label='LHCb', color=2, **conf),
        Record(2.18, (0.81, -0.77), label="Global fit", **conf),
        Limit(0.59, label='BESIII', **conf),
    ]

    if ROOT.gStyle:
        ROOT.gStyle.SetEndErrorSize(5)
        ROOT.gStyle.SetTickLength(0.008)

    result = draw_summary(data, vmin=0, vmax=8)

    if ROOT.gPad:
        ROOT.gPad.RedrawAxis()

    time.sleep(3)
def test_summary3():

    conf = {
        'label_position': 5.6,
        'markersize': 1.2,
        'text_size': 0.07,
    }  ##  'line_width' : 2 }

    data = [
        Point(0, **conf),
        Point(+19, **conf),
        Interval(-1, +19, **conf),
        Limit(0, -40, arrow_size=0.01, arrow_style='|-|>', **conf),
        Point(+35, **conf),
        Point(+52, **conf),
        Interval(-3, -1, **conf),
        Point(-15, **conf),
        Point(+91, **conf),
        Point(+60, **conf),
        Point(-79, **conf),
        Point(+88, **conf),
        Point(-215, **conf),
        Interval(-70, +124, **conf),
        Point(+102, **conf),
        Record(VE(7, 12**2), **conf),
        Limit(0, -40, arrow_size=0.01, arrow_style='|-|>', **conf),
        Point(+100, **conf),
        Record(VE(25, 90**2), **conf),
        Point(-150, **conf),
        Point(0, **conf),
        Point(-23, **conf),
        Point(+98, **conf),
        Point(-149, **conf),
        Record(-3, (+4, -15), **conf),
        Point(+53, **conf),
        Point(+166, **conf),
        Point(+260, **conf),
        Point(-182, **conf),
        Interval(-250, +2, **conf),
        Point(+13, **conf),
        Point(+164, **conf),
    ]

    if ROOT.gStyle:
        ROOT.gStyle.SetEndErrorSize(5)
        ROOT.gStyle.SetTickLength(0.008)

    result = draw_summary(data, vmin=-300, vmax=300)

    if ROOT.gPad:
        ROOT.gPad.RedrawAxis()

    time.sleep(3)
Exemple #3
0
def test_summary1 () :
    
    conf = { 'label_position' : 3861 , 'markersize' : 1.2 , 'text_size' : 0.05 , } ##  'line_width' : 2 } 
    
    ave = Average ( VE(3871.64,0.060**2) , label = 'New average' , **conf ) 
    
    data = [
        Record ( 3871.8  , 3.1         , 3.0   , label = 'D0'             , **conf ) ,
        Record ( 3873.0  , (+1.8,-1.6) , 1.3   , label = 'BaBar'          , **conf ) ,
        Record ( 3868.7  , 1.5         , 0.4   , label = 'BaBar-K^{0}'    , **conf ) ,
        Record ( 3871.4  , 0.6         , 0.1   , label = 'BaBar-K^{+}'    , **conf ) ,
        Record ( 3871.9  , 0.7         , 0.2   , label = 'BESIII'         , **conf ) ,
        Record ( 3871.95 , 0.48        , 0.12  , label = "LHCb'2010"      , **conf ) ,
        Record ( 3871.85 , 0.27        , 0.19  , label = 'Belle'          , **conf ) ,
        Record ( 3871.61 , 0.16        , 0.19  , label = 'CDF'            , **conf ) ,
        Record ( 3871.69 , 0.17                , label = "PDG\'2018"      , **conf ) ,
        Record ( 3871.70 , 0.067       , 0.068 , label = "LHCb'2020"      , color = ROOT.kRed , **conf ) ,
        Record ( 3871.59 , 0.060       , 0.030 , label = "LHCb'2020"      , color = ROOT.kRed , **conf ) ,
        ave   , 
        Record ( 3871.70 , 0.11                , label = 'm_{D^{0}}#plusm_{D^{*0}}' , **conf  )
        ]
    
    if ROOT.gStyle :
        ROOT.gStyle.SetEndErrorSize (5    )
        ROOT.gStyle.SetTickLength   (0.008)

    result = draw_summary ( data  , average  = ave , vmin = 3860 , vmax = 3877 , offset = 1.0 )  

    if ROOT.gPad :
        ROOT.gPad.RedrawAxis()
        
    time.sleep (3)
Exemple #4
0
    def __call__ ( self , s ) :
        """   Calculate the weigth for the given ``event'' (==record in TTree/TChain or RooDataSet):
        >>> weight = Weight ( ... )
        >>> tree   = ...
        >>> w = weight ( tree )
        """

        ## initialize the weight 
        weight  = VE(1,0) 

        ## loop over functions 
        for i in self.__vars :
            
            funval    = i[1] ## accessor 
            functions = i[2] ## the functions 

            ##  get the weight arguments for given event 
            v       = funval ( s )

            ww = VE(1.0)
            
            for f in functions :

                if isinstance ( v , tuple ) : w = f ( *v )
                else                        : w = f (  v )

                ww *= w # update the weight factor 

            ## keep the statistics
            cnt  = i[3]
            cnt += ww.value()

            ## update the global weight 
            weight *= ww
            
        vw = weight.value()
        
        self.__counter += vw 
        if iszero ( vw ) : self.__nzeroes += 1
            
        return vw
Exemple #5
0
    def __init__ ( self                   ,
                   dbase   = "weights.db" , ## the name of data base with the weights 
                   factors = []           ) :
        
        #
        ## make some statistic
        #
        self.__counter = SE ()
        self.__nzeroes = 0 

        self.__vars    = [] 
        if not factors : return

        ## open database 
        with DBASE.open ( dbase , 'r' ) as db : ## READONLY
            
            for k in db :
                e = db[k]
                if hasattr ( e , '__len__' ) :  
                    logger.debug( "DBASE ``%.15s'' key ``%.15s'' #%d" % ( dbase ,  k, len( e ) ) ) 
                
            ## loop over the weighting factors and build the function
            for wvar in factors :

                funval  = wvar.accessor  ## accessor to the variable 
                funname = wvar.address   ## address  in database 
                merge   = wvar.merge     ## merge sequence of callables?
                skip    = wvar.skip      ## skip   some of them?
                
                if isinstance ( funval , str ) :
                    ## funval = operator.attrgetter( funval ) 
                    funval = AttrGetter( funval ) 
                    
                ## 
                functions  = db.get ( funname , [] ) ## db[ funname ]
                if not functions :
                    logger.warning("No reweighting is available for ``%s'', skip it" % funname )
                    continue
                                
                if not isinstance (  functions , ( list , tuple ) ) :
                    functions = [ functions ]                    
                
                flen = len(functions) 
                if   0 < skip and skip      < flen :
                    logger.info  ("Use only %d first iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:skip] 
                elif 0 > skip and abs(skip) < flen :
                    logger.info  ("Skip last %d iterations for ``%s''" % ( skip , funname ) )
                    functions = functions[:-1*skip] 
                elif 0 == skip :
                    pass
                else :
                    logger.error("Invalid ``skip'' parameter %s/%d for ``%s''" % ( skip , flen , funname ) )
                
                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first     = True 
                for f in reversed ( functions ) :
                    if isinstance ( f , ROOT.TH1 ) and _first : 
                        ff = f.clone()
                        for i in ff :
                            v     = float ( ff[i] )
                            ff[i] = VE(v,0)
                        _functions.append ( ff  )                        
                        _first = False 
                    else :
                        _functions.append ( f  )
                        
                _functions.reverse() 
                functions = _functions
                    
                ## merge list of functions into single function 
                if merge and 1 < len ( functions)  : 
                            
                    ## single_func = functions[0] * functions [1] 
                    single_func = MULT ( functions[0] , functions [1] )
                    
                    for fun in functions [2:] :

                        ## multiply it                               
                        ## single_func *= fun
                        single_func = MULT ( single_func , fun )
                            
                    functions  = [ single_func ]
                    
                self.__vars += [ ( funname , funval , functions , SE() ) ]
                
        self.__vars = tuple ( self.__vars ) 
Exemple #6
0
def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.01,  ## delta for ``mean''  weight variation
        minmax=0.03,  ## delta for ``minmax'' weight variation
        power=None,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        make_plots=False,  ## make plots 
        tag="Reweighting"):
    """The main  function: perform one re-weighting iteration 
    and reweight ``MC''-data set to looks as ``data''(reference) dataset
    >>> results = makeWeights (
    ... dataset           , ## data source to be  reweighted (DataSet, TTree, abstract source)
    ... plots             , ## reweighting plots
    ... database          , ## datadabse to store/update reweigting results
    ... delta             , ## stopping criteria for `mean`    weight variation
    ... minmax            , ## stopping criteria for `min/max` weight variation
    ... power             , ## effective power to apply to the weigths
    ... debug      = True , ## store debuig information in database
    ... make_plots = True , ## produce useful comparison plots
    ... tag        = 'RW' ) ## tag for better printout
    
    If `make_plots = False`,  it returns the tuple of active reweitings:
    >>> active        = makeWeights ( ... , make_plots = False , ... )
    
    Otherwise it also returns list of comparison plots 
    >>> active, cmp_plots = makeWeights ( ... , make_plots = True  , ... )
    >>> for item in  cmp_plots :
    ...    what    = item.what
    ...    hdata   = item.data
    ...    hmc     = item.mc
    ...    hweight = item.weight
    
    If no more rewighting iteratios required, <code>active</code> is an empty tuple 
    """

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    nplots = len(plots)
    ## if 1 < nplots :
    ##     import  math
    ##     fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
    ##     delta   = delta  * fudge_factor
    ##     minmax  = minmax * fudge_factor

    ## list of plots to compare
    cmp_plots = []
    ## reweighting summary table
    header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf',
              'ww', 'exp')

    rows = {}
    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        projector = wplot.projector  ## projector for MC data
        ignore = wplot.ignore  ## ignore for weigtht building?
        #
        # normalize the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        hmc0 = projector(dataset, hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("%s: statistic goes to zero %s/``%s''" %
                           (tag, st, address))
        elif mnmx[0] <= 0:
            logger.warning("%s: statistic is negative  %s/``%s''" %
                           (tag, st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1)

        if   hboth and 1 == hmc.dim () and 1 == hdata.dim () and \
               len ( hmc ) >= len( hdata ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) and \
                 ( hmc.binsz() >= hdata.binsz() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #

        c2ndf = 0
        for i in w:
            c2ndf += w[i].chi2(1.0)
        c2ndf /= (len(w) - 1)

        ## build  the row in the summary table
        row = address  ,  \
              '%-5.3f/%5.3f' % ( cnt.minmax()[0]    , cnt.minmax()[1] ) , \
              allright ( '+' ) if good2 else attention ( '-' ) , \
              (wvar * 100).toString('%6.2f+-%-6.2f') , \
              allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf

        ## make plots at the start of  each iteration?
        if make_plots:
            item = ComparisonPlot(what, hdata, hmc, w)
            cmp_plots.append(item)

        row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else ''])

        rows[address] = row

        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if (not good) and (not ignore):  ## small variance?
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))

        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    active = tuple([p[0] for p in save_to_db])
    nactive = len(active)

    if power and callable(power):
        eff_exp = power(nactive)
    elif isinstance(power, num_types) and 0 < power <= 1.5:
        eff_exp = 1.0 * power
    elif 1 == nactive and 1 < len(plots):
        eff_exp = 0.95
    elif 1 == nactive:
        eff_exp = 1.00
    else:
        eff_exp = 1.10 / max(nactive, 1)

    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        ## avoid too large or too small  weights
        for i in weight:
            w = weight[i]
            if w.value() < 0.5:
                weight[i] = VE(0.5, w.cov2())
            elif w.value() > 2.0:
                weight[i] = VE(2.0, w.cov2())

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp
            row = list(rows[address])
            row.append('%4.3f' % eff_exp)
            rows[address] = tuple(row)

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    table = [header]
    for row in rows:
        table.append(rows[row])

    import ostap.logger.table as Table
    logger.info(
        '%s, active:#%d \n%s ' %
        (tag, nactive,
         Table.table(table, title=tag, prefix='# ', alignment='lccccccc')))

    cmp_plots = tuple(cmp_plots)
    return (active, cmp_plots) if make_plots else active
Exemple #7
0
    def __init__(
            self,
            dbase="weights.db",  ## the name of data base with the weights 
            factors=[]):

        #
        ## make some statistic
        #
        self.__counter = SE()
        self.__nzeroes = 0

        self.__vars = []
        if not factors: return
        self.__dbase = dbase

        ## open database

        self.__table = [('Reweighting', 'accessor', '#', 'merged?', 'skip')]
        rows = []

        with DBASE.open(dbase, 'r') as db:  ## READONLY

            logger.debug('Reweigting database: \n%s' % db.table(prefix='# '))

            ## loop over the weighting factors and build the function
            for wvar in factors:

                funval = wvar.accessor  ## accessor to the variable
                funname = wvar.address  ## address  in database
                merge = wvar.merge  ## merge sequence of callables?
                skip = wvar.skip  ## skip   some of them?

                row = []

                row.append(funname)

                if isinstance(funval, str):
                    row.append(funval)
                    ## funval = operator.attrgetter( funval )
                    funval = AttrGetter(funval)
                elif isinstance(funval, AttrGetter):
                    atts = funval.attributes
                    if 1 == len(atts): atts = atts[0]
                    row.append(str(atts))
                else:
                    row.append('')

                ##
                functions = db.get(funname, [])  ## db[ funname ]
                if not functions:
                    logger.warning(
                        "No reweighting is available for ``%s'', skip it" %
                        funname)
                    continue

                if not isinstance(functions, (list, tuple)):
                    functions = [functions]

                flen = len(functions)
                if 0 < skip and skip < flen:
                    logger.info("Use only %d first iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 > skip and abs(skip) < flen:
                    logger.info("Skip last %d iterations for ``%s''" %
                                (skip, funname))
                    functions = functions[:skip]
                elif 0 == skip:
                    pass
                else:
                    logger.error(
                        "Invalid ``skip'' parameter %s/%d for ``%s''" %
                        (skip, flen, funname))
                row.append('%d' % flen)

                ## nullify the uncertainties except for the last histogram
                _functions = []
                _first = True
                for f in reversed(functions):
                    if isinstance(f, ROOT.TH1) and _first:
                        ff = f.clone()
                        for i in ff:
                            v = float(ff[i])
                            ff[i] = VE(v, 0)
                        _functions.append(ff)
                        _first = False
                    else:
                        _functions.append(f)

                _functions.reverse()
                functions = _functions

                row.append('+' if merge else '-')
                row.append('%s' % skip)

                ## merge list of functions into single function
                if merge and 1 < len(functions):

                    ## single_func = functions[0] * functions [1]
                    single_func = MULT(functions[0], functions[1])

                    for fun in functions[2:]:

                        ## multiply it
                        ## single_func *= fun
                        single_func = MULT(single_func, fun)

                    functions = [single_func]

                self.__vars += [(funname, funval, functions, SE())]

                self.__table.append(row)

        self.__vars = tuple(self.__vars)
    from ostap.utils.cleanup import CleanUp
    files = [
        CleanUp.tempfile(prefix='ostap-test-trees-addbranch-%d-' % i,
                         suffix='.root') for i in range(nfiles)
    ]

    for f in progress_bar(files):
        create_tree(f, nentries)
    return files


# =============================================================================
## let h2 be a weigth histogram
h2 = ROOT.TH2D('h2', '', 20, 0, 10, 15, 2, 5)
h2 += lambda x, y: VE(10 + x + y, (0.25 * (10 + x + y))**2)

cut = ROOT.TCut('abs(mass-3.1)<1*0.015')


# =============================================================================
## Add pseudoexepriments into TTree/TChain
def test_modify_initial_tree(NEXP=10):
    """Add pseudoexepriments into TTree/TChain
    """

    files = prepare_data(1, 100000)

    logger.info('Add %s pseudoexepriments into TTree/TChain' % NEXP)

    logger.info('#files:    %s' % len(files))
Exemple #9
0
import ostap.utils.cleanup   as     CU
import ostap.io.zipshelve    as     zipshelve
import ostap.io.bz2shelve    as     bz2shelve
if  2 < python_version.major :
    import ostap.io.lzshelve as     lzshelve
else :
    lzshelve = None 
import ostap.io.sqliteshelve as     sqliteshelve
import ostap.io.rootshelve   as     rootshelve

# =============================================================================
 
bins    = 1000
data    = {}
h1      = ROOT.TH1D('h1','1D-histogram',bins,-5,5) ; h1.Sumw2() 
m1      = VE(1,2)
for i in range ( 0, 100000) : h1.Fill( m1.gauss() )

bins    = 50
h2      = ROOT.TH2D('h2','2D-histogram',bins,-5,5,bins,-5,5) ; h2.Sumw2() 
for i in range ( 0, 100000) : h2.Fill( m1.gauss() , m1.gauss() )

data [ 'histo-1D' ] = h1
data [ 'histo-2D' ] = h2
data [ 'both'     ] = (123 , h1 , {'a':2}, h2,'comment',())
data [ 'histos'   ] = {}
for i in range ( 5000 ) : 
    ht = 'histo#%d' % i
    hh = ROOT.TH1D ( ht , '' , 500 , 0 , 100 )
    for j in range ( 200 ) :
        hh.Fill ( random.gauss ( 50 , 10) )
Exemple #10
0
import ostap.io.zipshelve as zipshelve
import ostap.io.sqliteshelve as sqliteshelve
import ostap.io.rootshelve as rootshelve

# =============================================================================

db_sql_name = tempfile.mktemp(suffix='.msql')
db_zip_name = tempfile.mktemp(suffix='.zdb')
db_root_name = tempfile.mktemp(suffix='.root')

bins = 1000
data = {}
h1 = ROOT.TH1D('h1', '1D-histogram', bins, -5, 5)
h1.Sumw2()
m1 = VE(1, 2)
for i in range(0, 100000):
    h1.Fill(m1.gauss())

bins = 50
h2 = ROOT.TH2D('h2', '2D-histogram', bins, -5, 5, bins, -5, 5)
h2.Sumw2()
for i in range(0, 100000):
    h2.Fill(m1.gauss(), m1.gauss())

data['histo-1D'] = h1
data['histo-2D'] = h2
data['both'] = (123, h1, {'a': 2}, h2, 'comment')


def test_shelves():