Example #1
0
def test_compare_gauss_vs_exponent () :     
    _ig = 0 
    for ig in ( h1g , h2g , h3g , h4g , h5g ) :
        _ig += 1
        _ie  = 0 
        for ie in ( h1e , h2e , h3e , h4e , h5e ) :
            _ie += 1 
            logger.info( 'Compare gaussian  (%d) and exponent    (%d)' % ( _ig , _ie ) )
            compare ( ig , ie )
Example #2
0
def test_compare_uniform_vs_exponent () :     
    _iu = 0 
    for iu in ( h1u , h2u , h3u , h4u , h5u ) :
        _iu += 1
        _ie  = 0 
        for ie in ( h1e , h2e , h3e , h4e , h5e ) :
            _ie += 1 
            logger.info( 'Compare uniform   (%d) and exponent    (%d)' % ( _iu , _ie ) )
            compare ( iu , ie )
Example #3
0
def test_compare_gauss_vs_uniform() :     
    _ig = 0 
    for ig in ( h1g , h2g , h3g , h4g , h5g ) :
        _ig += 1
        _iu  = 0 
        for iu in ( h1u , h2u , h3u , h4u , h5u ) :
            _iu += 1 
            logger.info( 'Compare gaussian  (%d) and uniform     (%d)' % ( _ig , _iu ) )
            compare ( ig , iu )
Example #4
0
def makeWeights(
        dataset,
        plots=[],
        database="weights.db",
        compare=None,  ## comparison function 
        delta=0.01,  ## delta for ``mean''  weight variation
        minmax=0.03,  ## delta for ``minmax'' weight variation
        power=None,  ## auto-determination
        debug=True,  ## save intermediate information in DB
        make_plots=False,  ## make plots 
        tag="Reweighting"):
    """The main  function: perform one re-weighting iteration 
    and reweight ``MC''-data set to looks as ``data''(reference) dataset
    >>> results = makeWeights (
    ... dataset           , ## data source to be  reweighted (DataSet, TTree, abstract source)
    ... plots             , ## reweighting plots
    ... database          , ## datadabse to store/update reweigting results
    ... delta             , ## stopping criteria for `mean`    weight variation
    ... minmax            , ## stopping criteria for `min/max` weight variation
    ... power             , ## effective power to apply to the weigths
    ... debug      = True , ## store debuig information in database
    ... make_plots = True , ## produce useful comparison plots
    ... tag        = 'RW' ) ## tag for better printout
    
    If `make_plots = False`,  it returns the tuple of active reweitings:
    >>> active        = makeWeights ( ... , make_plots = False , ... )
    
    Otherwise it also returns list of comparison plots 
    >>> active, cmp_plots = makeWeights ( ... , make_plots = True  , ... )
    >>> for item in  cmp_plots :
    ...    what    = item.what
    ...    hdata   = item.data
    ...    hmc     = item.mc
    ...    hweight = item.weight
    
    If no more rewighting iteratios required, <code>active</code> is an empty tuple 
    """

    assert 0 < delta, "makeWeights(%s): Invalid value for ``delta''  %s" % (
        tag, delta)
    assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % (
        tag, minmax)

    from ostap.logger.colorized import allright, attention, infostr
    from ostap.utils.basic import isatty

    nplots = len(plots)
    ## if 1 < nplots :
    ##     import  math
    ##     fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots -  1.0 ) )
    ##     delta   = delta  * fudge_factor
    ##     minmax  = minmax * fudge_factor

    ## list of plots to compare
    cmp_plots = []
    ## reweighting summary table
    header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf',
              'ww', 'exp')

    rows = {}
    save_to_db = []
    ## number of active plots for reweighting
    for wplot in plots:

        what = wplot.what  ## variable/function to plot/compare
        how = wplot.how  ## weight and/or additional cuts
        address = wplot.address  ## address in database
        hdata0 = wplot.data  ## original "DATA" object
        hmc0 = wplot.mc_histo  ## original "MC"   histogram
        ww = wplot.w  ## relative weight
        projector = wplot.projector  ## projector for MC data
        ignore = wplot.ignore  ## ignore for weigtht building?
        #
        # normalize the data
        #
        hdata = hdata0
        if isinstance(hdata, ROOT.TH1): hdata = hdata.density()

        # =====================================================================
        ## make a plot on (MC) data with the weight
        # =====================================================================
        hmc0 = projector(dataset, hmc0, what, how)

        st = hmc0.stat()
        mnmx = st.minmax()
        if iszero(mnmx[0]):
            logger.warning("%s: statistic goes to zero %s/``%s''" %
                           (tag, st, address))
        elif mnmx[0] <= 0:
            logger.warning("%s: statistic is negative  %s/``%s''" %
                           (tag, st, address))

        # =====================================================================
        ## normalize MC
        # =====================================================================
        hmc = hmc0.density()

        # =====================================================================
        ## calculate  the reweighting factor : a bit conservative (?)
        #  this is the only important line
        # =====================================================================

        #  try to exploit finer binning if/when possible
        hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1)

        if   hboth and 1 == hmc.dim () and 1 == hdata.dim () and \
               len ( hmc ) >= len( hdata ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \
                 ( hmc.binsx() >= hdata.binsx() ) and \
                 ( hmc.binsy() >= hdata.binsy() ) and \
                 ( hmc.binsz() >= hdata.binsz() ) :
            w = (1.0 / hmc) * hdata  ## NB!
        else:
            w = hdata / hmc  ## NB!

        # =====================================================================
        ## scale & get the statistics of weights
        w /= w.stat().mean().value()
        cnt = w.stat()
        #
        mnw, mxw = cnt.minmax()
        wvar = cnt.rms() / cnt.mean()
        good1 = wvar.value() <= delta
        good2 = abs(mxw - mnw) <= minmax
        good = good1 and good2  ## small variance?
        #

        c2ndf = 0
        for i in w:
            c2ndf += w[i].chi2(1.0)
        c2ndf /= (len(w) - 1)

        ## build  the row in the summary table
        row = address  ,  \
              '%-5.3f/%5.3f' % ( cnt.minmax()[0]    , cnt.minmax()[1] ) , \
              allright ( '+' ) if good2 else attention ( '-' ) , \
              (wvar * 100).toString('%6.2f+-%-6.2f') , \
              allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf

        ## make plots at the start of  each iteration?
        if make_plots:
            item = ComparisonPlot(what, hdata, hmc, w)
            cmp_plots.append(item)

        row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else ''])

        rows[address] = row

        #
        ## make decision based on the variance of weights
        #
        mnw, mxw = cnt.minmax()
        if (not good) and (not ignore):  ## small variance?
            save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w))

        # =====================================================================
        ## make a comparison (if needed)
        # =====================================================================
        if compare: compare(hdata0, hmc0, address)

    active = tuple([p[0] for p in save_to_db])
    nactive = len(active)

    if power and callable(power):
        eff_exp = power(nactive)
    elif isinstance(power, num_types) and 0 < power <= 1.5:
        eff_exp = 1.0 * power
    elif 1 == nactive and 1 < len(plots):
        eff_exp = 0.95
    elif 1 == nactive:
        eff_exp = 1.00
    else:
        eff_exp = 1.10 / max(nactive, 1)

    while database and save_to_db:

        entry = save_to_db.pop()

        address, ww, hd0, hm0, hd, hm, weight = entry

        cnt = weight.stat()
        mnw, mxw = cnt.minmax()

        ## avoid too large or too small  weights
        for i in weight:
            w = weight[i]
            if w.value() < 0.5:
                weight[i] = VE(0.5, w.cov2())
            elif w.value() > 2.0:
                weight[i] = VE(2.0, w.cov2())

        if 1 < nactive and 1 != ww:
            eff_exp *= ww
            logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" %
                        (tag, eff_exp, address))

        if 1 != eff_exp and 0 < eff_exp:
            weight = weight**eff_exp
            row = list(rows[address])
            row.append('%4.3f' % eff_exp)
            rows[address] = tuple(row)

        with DBASE.open(database) as db:

            db[address] = db.get(address, []) + [weight]

            if debug:
                addr = address + ':REWEIGHTING'
                db[addr] = db.get(addr, []) + list(entry[2:])

        del hd0, hm0, hd, hm, weight, entry

    table = [header]
    for row in rows:
        table.append(rows[row])

    import ostap.logger.table as Table
    logger.info(
        '%s, active:#%d \n%s ' %
        (tag, nactive,
         Table.table(table, title=tag, prefix='# ', alignment='lccccccc')))

    cmp_plots = tuple(cmp_plots)
    return (active, cmp_plots) if make_plots else active
Example #5
0
def test_compare_exponentials () :
    logger.info( 'Compare exponentials (1) and (2)' )
    compare ( h1e , h2e )
    logger.info( 'Compare exponentials (1) and (3)' )
    compare ( h1e , h3e )
    logger.info( 'Compare exponentials (1) and (4)' )
    compare ( h1e , h4e )
    logger.info( 'Compare exponentials (1) and (4) with rescale' )
    compare ( h1e , h4e , True )
    logger.info( 'Compare exponentials (1) and (5)' )
    compare ( h1e , h5e )
    logger.info( 'Compare exponentials (2) and (3) : should be the same!' )
    compare ( h2e , h3e )
    logger.info( 'Compare exponentials (2) and (4)' )
    compare ( h2e , h4e )
    logger.info( 'Compare exponentials (2) and (4) with rescale' )
    compare ( h2e , h4e , True )
    logger.info( 'Compare exponentials (2) and (5)' )
    compare ( h2e , h5e )
    logger.info( 'Compare exponentials (3) and (4)' ) 
    compare ( h3e , h4e )
    logger.info( 'Compare exponentials (3) and (4) with rescale' ) 
    compare ( h3e , h4e , True )
    logger.info( 'Compare exponentials (3) and (5)' ) 
    compare ( h3e , h5e )
    logger.info( 'Compare exponentials (4) and (5)' ) 
    compare ( h4e , h5e )
Example #6
0
def test_compare_uniforms () :
    logger.info( 'Compare uniforms     (1) and (2)' )
    compare ( h1u , h2u )
    logger.info( 'Compare uniforms     (1) and (3)' )
    compare ( h1u , h3u )
    logger.info( 'Compare uniforms     (1) and (4)' )
    compare ( h1u , h4u )
    logger.info( 'Compare uniforms     (1) and (4) with rescale' )
    compare ( h1u , h4u , True )
    logger.info( 'Compare uniforms     (1) and (5)' )
    compare ( h1u , h5u )
    logger.info( 'Compare uniforms     (2) and (3) : should be the same!' )
    compare ( h2u , h3u )
    logger.info( 'Compare uniforms     (2) and (4)' )
    compare ( h2u , h4u )
    logger.info( 'Compare uniforms     (2) and (4) with rescale' )
    compare ( h2u , h4u , True )
    logger.info( 'Compare uniforms     (2) and (5)' )
    compare ( h2u , h4u )
    logger.info( 'Compare uniforms     (3) and (4)' ) 
    compare ( h3u , h4u )
    logger.info( 'Compare uniforms     (3) and (4) with rescale;' ) 
    compare ( h3u , h4u , True )
    logger.info( 'Compare uniforms     (3) and (5)' ) 
    compare ( h3u , h5u )
    logger.info( 'Compare uniforms     (4) and (5)' ) 
    compare ( h4u , h5u )
Example #7
0
def test_compare_gaussians() : 
    logger.info( 'Compare gaussians    (1) and (2)' )
    compare ( h1g , h2g )
    logger.info( 'Compare gaussians    (1) and (3)' )
    compare ( h1g , h3g )
    logger.info( 'Compare gaussians    (1) and (4)' )
    compare ( h1g , h4g )
    logger.info( 'Compare gaussians    (1) and (4) with rescale' )
    compare ( h1g , h4g , True )
    logger.info( 'Compare gaussians    (1) and (5)' )
    compare ( h1g , h5g )
    logger.info( 'Compare gaussians    (2) and (3) : should be the same!' )
    compare ( h2g , h3g )
    logger.info( 'Compare gaussians    (2) and (4)' )
    compare ( h2g , h4g )
    logger.info( 'Compare gaussians    (2) and (4) with rescale' )
    compare ( h2g , h4g , True )
    logger.info( 'Compare gaussians    (2) and (5)' )
    compare ( h2g , h5g )
    logger.info( 'Compare gaussians    (3) and (4)' ) 
    compare ( h3g , h4g )
    logger.info( 'Compare gaussians    (3) and (4) with rescale' ) 
    compare ( h3g , h4g , True )
    logger.info( 'Compare gaussians    (3) and (5)' ) 
    compare ( h3g , h5g )
    logger.info( 'Compare gaussians    (4) and (5)' ) 
    compare ( h4g , h5g )
Example #8
0
def test_compare_exponentials():
    compare(h1e, h2e, 'Compare exponentials (1) and (2)')
    compare(h1e, h3e, 'Compare exponentials (1) and (3)')
    compare(h1e, h4e, 'Compare exponentials (1) and (4)')
    compare(h1e,
            h4e,
            'Compare exponentials (1) and (4) with rescale',
            density=True)
    compare(h1e, h5e, 'Compare exponentials (1) and (5)')
    compare(h2e, h3e, 'Compare exponentials (2) and (3) : should be the same!')
    compare(h2e, h4e, 'Compare exponentials (2) and (4)')
    compare(h2e,
            h4e,
            'Compare exponentials (2) and (4) with rescale',
            density=True)
    compare(h2e, h5e, 'Compare exponentials (2) and (5)')
    compare(h3e, h4e, 'Compare exponentials (3) and (4)')
    compare(h3e,
            h4e,
            'Compare exponentials (3) and (4) with rescale',
            density=True)
    compare(h3e, h5e, 'Compare exponentials (3) and (5)')
    compare(h4e, h5e, 'Compare exponentials (4) and (5)')
Example #9
0
def test_compare_uniforms():
    compare(h1u, h2u, 'Compare uniforms     (1) and (2)')
    compare(h1u, h3u, 'Compare uniforms     (1) and (3)')
    compare(h1u, h4u, 'Compare uniforms     (1) and (4)')
    compare(h1u,
            h4u,
            'Compare uniforms     (1) and (4) with rescale',
            density=True)
    compare(h1u, h5u, 'Compare uniforms     (1) and (5)')
    compare(h2u, h3u, 'Compare uniforms     (2) and (3) : should be the same!')
    compare(h2u, h4u, 'Compare uniforms     (2) and (4)')
    compare(h2u,
            h4u,
            'Compare uniforms     (2) and (4) with rescale',
            density=True)
    compare(h2u, h4u, 'Compare uniforms     (2) and (5)')
    compare(h3u, h4u, 'Compare uniforms     (3) and (4)')
    compare(h3u,
            h4u,
            'Compare uniforms     (3) and (4) with rescale;',
            density=True)
    compare(h3u, h5u, 'Compare uniforms     (3) and (5)')
    compare(h4u, h5u, 'Compare uniforms     (4) and (5)')
Example #10
0
def test_compare_gaussians():
    compare(h1g, h2g, 'Compare gaussians    (1) and (2)')
    compare(h1g, h3g, 'Compare gaussians    (1) and (3)')
    compare(h1g, h4g, 'Compare gaussians    (1) and (4)')
    compare(h1g,
            h4g,
            'Compare gaussians    (1) and (4) with rescale',
            density=True)
    compare(h1g, h5g, 'Compare gaussians    (1) and (5)')
    compare(h2g, h3g, 'Compare gaussians    (2) and (3) : should be the same!')
    compare(h2g, h4g, 'Compare gaussians    (2) and (4)')
    compare(h2g,
            h4g,
            'Compare gaussians    (2) and (4) with rescale',
            density=True)
    compare(h2g, h5g, 'Compare gaussians    (2) and (5)')
    compare(h3g, h4g, 'Compare gaussians    (3) and (4)')
    compare(h3g,
            h4g,
            'Compare gaussians    (3) and (4) with rescale',
            density=True)
    compare(h3g, h5g, 'Compare gaussians    (3) and (5)')
    compare(h4g, h5g, 'Compare gaussians    (4) and (5)')