def test_summary2(): conf = { 'label_position': 5.6, 'markersize': 1.2, 'text_size': 0.07, } ## 'line_width' : 2 } data = [ Limit(2.1, label='Belle', **conf), Record(VE(3.4, 1.4**2), label='BaBar', **conf), Record(VE(2.46, 0.64**2), 0.29, label='LHCb', color=2, **conf), Record(2.18, (0.81, -0.77), label="Global fit", **conf), Limit(0.59, label='BESIII', **conf), ] if ROOT.gStyle: ROOT.gStyle.SetEndErrorSize(5) ROOT.gStyle.SetTickLength(0.008) result = draw_summary(data, vmin=0, vmax=8) if ROOT.gPad: ROOT.gPad.RedrawAxis() time.sleep(3)
def test_summary3(): conf = { 'label_position': 5.6, 'markersize': 1.2, 'text_size': 0.07, } ## 'line_width' : 2 } data = [ Point(0, **conf), Point(+19, **conf), Interval(-1, +19, **conf), Limit(0, -40, arrow_size=0.01, arrow_style='|-|>', **conf), Point(+35, **conf), Point(+52, **conf), Interval(-3, -1, **conf), Point(-15, **conf), Point(+91, **conf), Point(+60, **conf), Point(-79, **conf), Point(+88, **conf), Point(-215, **conf), Interval(-70, +124, **conf), Point(+102, **conf), Record(VE(7, 12**2), **conf), Limit(0, -40, arrow_size=0.01, arrow_style='|-|>', **conf), Point(+100, **conf), Record(VE(25, 90**2), **conf), Point(-150, **conf), Point(0, **conf), Point(-23, **conf), Point(+98, **conf), Point(-149, **conf), Record(-3, (+4, -15), **conf), Point(+53, **conf), Point(+166, **conf), Point(+260, **conf), Point(-182, **conf), Interval(-250, +2, **conf), Point(+13, **conf), Point(+164, **conf), ] if ROOT.gStyle: ROOT.gStyle.SetEndErrorSize(5) ROOT.gStyle.SetTickLength(0.008) result = draw_summary(data, vmin=-300, vmax=300) if ROOT.gPad: ROOT.gPad.RedrawAxis() time.sleep(3)
def test_summary1 () : conf = { 'label_position' : 3861 , 'markersize' : 1.2 , 'text_size' : 0.05 , } ## 'line_width' : 2 } ave = Average ( VE(3871.64,0.060**2) , label = 'New average' , **conf ) data = [ Record ( 3871.8 , 3.1 , 3.0 , label = 'D0' , **conf ) , Record ( 3873.0 , (+1.8,-1.6) , 1.3 , label = 'BaBar' , **conf ) , Record ( 3868.7 , 1.5 , 0.4 , label = 'BaBar-K^{0}' , **conf ) , Record ( 3871.4 , 0.6 , 0.1 , label = 'BaBar-K^{+}' , **conf ) , Record ( 3871.9 , 0.7 , 0.2 , label = 'BESIII' , **conf ) , Record ( 3871.95 , 0.48 , 0.12 , label = "LHCb'2010" , **conf ) , Record ( 3871.85 , 0.27 , 0.19 , label = 'Belle' , **conf ) , Record ( 3871.61 , 0.16 , 0.19 , label = 'CDF' , **conf ) , Record ( 3871.69 , 0.17 , label = "PDG\'2018" , **conf ) , Record ( 3871.70 , 0.067 , 0.068 , label = "LHCb'2020" , color = ROOT.kRed , **conf ) , Record ( 3871.59 , 0.060 , 0.030 , label = "LHCb'2020" , color = ROOT.kRed , **conf ) , ave , Record ( 3871.70 , 0.11 , label = 'm_{D^{0}}#plusm_{D^{*0}}' , **conf ) ] if ROOT.gStyle : ROOT.gStyle.SetEndErrorSize (5 ) ROOT.gStyle.SetTickLength (0.008) result = draw_summary ( data , average = ave , vmin = 3860 , vmax = 3877 , offset = 1.0 ) if ROOT.gPad : ROOT.gPad.RedrawAxis() time.sleep (3)
def __call__ ( self , s ) : """ Calculate the weigth for the given ``event'' (==record in TTree/TChain or RooDataSet): >>> weight = Weight ( ... ) >>> tree = ... >>> w = weight ( tree ) """ ## initialize the weight weight = VE(1,0) ## loop over functions for i in self.__vars : funval = i[1] ## accessor functions = i[2] ## the functions ## get the weight arguments for given event v = funval ( s ) ww = VE(1.0) for f in functions : if isinstance ( v , tuple ) : w = f ( *v ) else : w = f ( v ) ww *= w # update the weight factor ## keep the statistics cnt = i[3] cnt += ww.value() ## update the global weight weight *= ww vw = weight.value() self.__counter += vw if iszero ( vw ) : self.__nzeroes += 1 return vw
def __init__ ( self , dbase = "weights.db" , ## the name of data base with the weights factors = [] ) : # ## make some statistic # self.__counter = SE () self.__nzeroes = 0 self.__vars = [] if not factors : return ## open database with DBASE.open ( dbase , 'r' ) as db : ## READONLY for k in db : e = db[k] if hasattr ( e , '__len__' ) : logger.debug( "DBASE ``%.15s'' key ``%.15s'' #%d" % ( dbase , k, len( e ) ) ) ## loop over the weighting factors and build the function for wvar in factors : funval = wvar.accessor ## accessor to the variable funname = wvar.address ## address in database merge = wvar.merge ## merge sequence of callables? skip = wvar.skip ## skip some of them? if isinstance ( funval , str ) : ## funval = operator.attrgetter( funval ) funval = AttrGetter( funval ) ## functions = db.get ( funname , [] ) ## db[ funname ] if not functions : logger.warning("No reweighting is available for ``%s'', skip it" % funname ) continue if not isinstance ( functions , ( list , tuple ) ) : functions = [ functions ] flen = len(functions) if 0 < skip and skip < flen : logger.info ("Use only %d first iterations for ``%s''" % ( skip , funname ) ) functions = functions[:skip] elif 0 > skip and abs(skip) < flen : logger.info ("Skip last %d iterations for ``%s''" % ( skip , funname ) ) functions = functions[:-1*skip] elif 0 == skip : pass else : logger.error("Invalid ``skip'' parameter %s/%d for ``%s''" % ( skip , flen , funname ) ) ## nullify the uncertainties except for the last histogram _functions = [] _first = True for f in reversed ( functions ) : if isinstance ( f , ROOT.TH1 ) and _first : ff = f.clone() for i in ff : v = float ( ff[i] ) ff[i] = VE(v,0) _functions.append ( ff ) _first = False else : _functions.append ( f ) _functions.reverse() functions = _functions ## merge list of functions into single function if merge and 1 < len ( functions) : ## single_func = functions[0] * functions [1] single_func = MULT ( functions[0] , functions [1] ) for fun in functions [2:] : ## multiply it ## single_func *= fun single_func = MULT ( single_func , fun ) functions = [ single_func ] self.__vars += [ ( funname , funval , functions , SE() ) ] self.__vars = tuple ( self.__vars )
def makeWeights( dataset, plots=[], database="weights.db", compare=None, ## comparison function delta=0.01, ## delta for ``mean'' weight variation minmax=0.03, ## delta for ``minmax'' weight variation power=None, ## auto-determination debug=True, ## save intermediate information in DB make_plots=False, ## make plots tag="Reweighting"): """The main function: perform one re-weighting iteration and reweight ``MC''-data set to looks as ``data''(reference) dataset >>> results = makeWeights ( ... dataset , ## data source to be reweighted (DataSet, TTree, abstract source) ... plots , ## reweighting plots ... database , ## datadabse to store/update reweigting results ... delta , ## stopping criteria for `mean` weight variation ... minmax , ## stopping criteria for `min/max` weight variation ... power , ## effective power to apply to the weigths ... debug = True , ## store debuig information in database ... make_plots = True , ## produce useful comparison plots ... tag = 'RW' ) ## tag for better printout If `make_plots = False`, it returns the tuple of active reweitings: >>> active = makeWeights ( ... , make_plots = False , ... ) Otherwise it also returns list of comparison plots >>> active, cmp_plots = makeWeights ( ... , make_plots = True , ... ) >>> for item in cmp_plots : ... what = item.what ... hdata = item.data ... hmc = item.mc ... hweight = item.weight If no more rewighting iteratios required, <code>active</code> is an empty tuple """ assert 0 < delta, "makeWeights(%s): Invalid value for ``delta'' %s" % ( tag, delta) assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % ( tag, minmax) from ostap.logger.colorized import allright, attention, infostr from ostap.utils.basic import isatty nplots = len(plots) ## if 1 < nplots : ## import math ## fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots - 1.0 ) ) ## delta = delta * fudge_factor ## minmax = minmax * fudge_factor ## list of plots to compare cmp_plots = [] ## reweighting summary table header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf', 'ww', 'exp') rows = {} save_to_db = [] ## number of active plots for reweighting for wplot in plots: what = wplot.what ## variable/function to plot/compare how = wplot.how ## weight and/or additional cuts address = wplot.address ## address in database hdata0 = wplot.data ## original "DATA" object hmc0 = wplot.mc_histo ## original "MC" histogram ww = wplot.w ## relative weight projector = wplot.projector ## projector for MC data ignore = wplot.ignore ## ignore for weigtht building? # # normalize the data # hdata = hdata0 if isinstance(hdata, ROOT.TH1): hdata = hdata.density() # ===================================================================== ## make a plot on (MC) data with the weight # ===================================================================== hmc0 = projector(dataset, hmc0, what, how) st = hmc0.stat() mnmx = st.minmax() if iszero(mnmx[0]): logger.warning("%s: statistic goes to zero %s/``%s''" % (tag, st, address)) elif mnmx[0] <= 0: logger.warning("%s: statistic is negative %s/``%s''" % (tag, st, address)) # ===================================================================== ## normalize MC # ===================================================================== hmc = hmc0.density() # ===================================================================== ## calculate the reweighting factor : a bit conservative (?) # this is the only important line # ===================================================================== # try to exploit finer binning if/when possible hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1) if hboth and 1 == hmc.dim () and 1 == hdata.dim () and \ len ( hmc ) >= len( hdata ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) and \ ( hmc.binsz() >= hdata.binsz() ) : w = (1.0 / hmc) * hdata ## NB! else: w = hdata / hmc ## NB! # ===================================================================== ## scale & get the statistics of weights w /= w.stat().mean().value() cnt = w.stat() # mnw, mxw = cnt.minmax() wvar = cnt.rms() / cnt.mean() good1 = wvar.value() <= delta good2 = abs(mxw - mnw) <= minmax good = good1 and good2 ## small variance? # c2ndf = 0 for i in w: c2ndf += w[i].chi2(1.0) c2ndf /= (len(w) - 1) ## build the row in the summary table row = address , \ '%-5.3f/%5.3f' % ( cnt.minmax()[0] , cnt.minmax()[1] ) , \ allright ( '+' ) if good2 else attention ( '-' ) , \ (wvar * 100).toString('%6.2f+-%-6.2f') , \ allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf ## make plots at the start of each iteration? if make_plots: item = ComparisonPlot(what, hdata, hmc, w) cmp_plots.append(item) row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else '']) rows[address] = row # ## make decision based on the variance of weights # mnw, mxw = cnt.minmax() if (not good) and (not ignore): ## small variance? save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w)) # ===================================================================== ## make a comparison (if needed) # ===================================================================== if compare: compare(hdata0, hmc0, address) active = tuple([p[0] for p in save_to_db]) nactive = len(active) if power and callable(power): eff_exp = power(nactive) elif isinstance(power, num_types) and 0 < power <= 1.5: eff_exp = 1.0 * power elif 1 == nactive and 1 < len(plots): eff_exp = 0.95 elif 1 == nactive: eff_exp = 1.00 else: eff_exp = 1.10 / max(nactive, 1) while database and save_to_db: entry = save_to_db.pop() address, ww, hd0, hm0, hd, hm, weight = entry cnt = weight.stat() mnw, mxw = cnt.minmax() ## avoid too large or too small weights for i in weight: w = weight[i] if w.value() < 0.5: weight[i] = VE(0.5, w.cov2()) elif w.value() > 2.0: weight[i] = VE(2.0, w.cov2()) if 1 < nactive and 1 != ww: eff_exp *= ww logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" % (tag, eff_exp, address)) if 1 != eff_exp and 0 < eff_exp: weight = weight**eff_exp row = list(rows[address]) row.append('%4.3f' % eff_exp) rows[address] = tuple(row) with DBASE.open(database) as db: db[address] = db.get(address, []) + [weight] if debug: addr = address + ':REWEIGHTING' db[addr] = db.get(addr, []) + list(entry[2:]) del hd0, hm0, hd, hm, weight, entry table = [header] for row in rows: table.append(rows[row]) import ostap.logger.table as Table logger.info( '%s, active:#%d \n%s ' % (tag, nactive, Table.table(table, title=tag, prefix='# ', alignment='lccccccc'))) cmp_plots = tuple(cmp_plots) return (active, cmp_plots) if make_plots else active
def __init__( self, dbase="weights.db", ## the name of data base with the weights factors=[]): # ## make some statistic # self.__counter = SE() self.__nzeroes = 0 self.__vars = [] if not factors: return self.__dbase = dbase ## open database self.__table = [('Reweighting', 'accessor', '#', 'merged?', 'skip')] rows = [] with DBASE.open(dbase, 'r') as db: ## READONLY logger.debug('Reweigting database: \n%s' % db.table(prefix='# ')) ## loop over the weighting factors and build the function for wvar in factors: funval = wvar.accessor ## accessor to the variable funname = wvar.address ## address in database merge = wvar.merge ## merge sequence of callables? skip = wvar.skip ## skip some of them? row = [] row.append(funname) if isinstance(funval, str): row.append(funval) ## funval = operator.attrgetter( funval ) funval = AttrGetter(funval) elif isinstance(funval, AttrGetter): atts = funval.attributes if 1 == len(atts): atts = atts[0] row.append(str(atts)) else: row.append('') ## functions = db.get(funname, []) ## db[ funname ] if not functions: logger.warning( "No reweighting is available for ``%s'', skip it" % funname) continue if not isinstance(functions, (list, tuple)): functions = [functions] flen = len(functions) if 0 < skip and skip < flen: logger.info("Use only %d first iterations for ``%s''" % (skip, funname)) functions = functions[:skip] elif 0 > skip and abs(skip) < flen: logger.info("Skip last %d iterations for ``%s''" % (skip, funname)) functions = functions[:skip] elif 0 == skip: pass else: logger.error( "Invalid ``skip'' parameter %s/%d for ``%s''" % (skip, flen, funname)) row.append('%d' % flen) ## nullify the uncertainties except for the last histogram _functions = [] _first = True for f in reversed(functions): if isinstance(f, ROOT.TH1) and _first: ff = f.clone() for i in ff: v = float(ff[i]) ff[i] = VE(v, 0) _functions.append(ff) _first = False else: _functions.append(f) _functions.reverse() functions = _functions row.append('+' if merge else '-') row.append('%s' % skip) ## merge list of functions into single function if merge and 1 < len(functions): ## single_func = functions[0] * functions [1] single_func = MULT(functions[0], functions[1]) for fun in functions[2:]: ## multiply it ## single_func *= fun single_func = MULT(single_func, fun) functions = [single_func] self.__vars += [(funname, funval, functions, SE())] self.__table.append(row) self.__vars = tuple(self.__vars)
from ostap.utils.cleanup import CleanUp files = [ CleanUp.tempfile(prefix='ostap-test-trees-addbranch-%d-' % i, suffix='.root') for i in range(nfiles) ] for f in progress_bar(files): create_tree(f, nentries) return files # ============================================================================= ## let h2 be a weigth histogram h2 = ROOT.TH2D('h2', '', 20, 0, 10, 15, 2, 5) h2 += lambda x, y: VE(10 + x + y, (0.25 * (10 + x + y))**2) cut = ROOT.TCut('abs(mass-3.1)<1*0.015') # ============================================================================= ## Add pseudoexepriments into TTree/TChain def test_modify_initial_tree(NEXP=10): """Add pseudoexepriments into TTree/TChain """ files = prepare_data(1, 100000) logger.info('Add %s pseudoexepriments into TTree/TChain' % NEXP) logger.info('#files: %s' % len(files))
import ostap.utils.cleanup as CU import ostap.io.zipshelve as zipshelve import ostap.io.bz2shelve as bz2shelve if 2 < python_version.major : import ostap.io.lzshelve as lzshelve else : lzshelve = None import ostap.io.sqliteshelve as sqliteshelve import ostap.io.rootshelve as rootshelve # ============================================================================= bins = 1000 data = {} h1 = ROOT.TH1D('h1','1D-histogram',bins,-5,5) ; h1.Sumw2() m1 = VE(1,2) for i in range ( 0, 100000) : h1.Fill( m1.gauss() ) bins = 50 h2 = ROOT.TH2D('h2','2D-histogram',bins,-5,5,bins,-5,5) ; h2.Sumw2() for i in range ( 0, 100000) : h2.Fill( m1.gauss() , m1.gauss() ) data [ 'histo-1D' ] = h1 data [ 'histo-2D' ] = h2 data [ 'both' ] = (123 , h1 , {'a':2}, h2,'comment',()) data [ 'histos' ] = {} for i in range ( 5000 ) : ht = 'histo#%d' % i hh = ROOT.TH1D ( ht , '' , 500 , 0 , 100 ) for j in range ( 200 ) : hh.Fill ( random.gauss ( 50 , 10) )
import ostap.io.zipshelve as zipshelve import ostap.io.sqliteshelve as sqliteshelve import ostap.io.rootshelve as rootshelve # ============================================================================= db_sql_name = tempfile.mktemp(suffix='.msql') db_zip_name = tempfile.mktemp(suffix='.zdb') db_root_name = tempfile.mktemp(suffix='.root') bins = 1000 data = {} h1 = ROOT.TH1D('h1', '1D-histogram', bins, -5, 5) h1.Sumw2() m1 = VE(1, 2) for i in range(0, 100000): h1.Fill(m1.gauss()) bins = 50 h2 = ROOT.TH2D('h2', '2D-histogram', bins, -5, 5, bins, -5, 5) h2.Sumw2() for i in range(0, 100000): h2.Fill(m1.gauss(), m1.gauss()) data['histo-1D'] = h1 data['histo-2D'] = h2 data['both'] = (123, h1, {'a': 2}, h2, 'comment') def test_shelves():