def graphs(self): """Get the dictionary of graphs """ grphs = {} import ostap.histos.graphs with DBASE.open(self.dbase, 'r') as db: ## READONLY for i in self.__vars: address = i[0] functions = db.get(address, ()) if not functions: continue graph = ROOT.TGraphAsymmErrors(len(functions)) for n, w in enumerate(functions): if not hasattr(w, 'stat'): continue cnt = w.stat() wmn, wmx = cnt.minmax() graph[n] = n, 0, 0, 1, 1 - wmn, wmx - 1 grphs[address] = graph return grphs
def runPidCalib(the_func, particle, stripping, polarity, trackcuts, **config): """ The basic function: - oversimplified version of MakePerfHistsRunRange.py script from Urania/PIDCalib """ # ## perform some arguments check # ## 1) check the stripping version from PIDPerfScripts.DataFuncs import CheckStripVer CheckStripVer(stripping) ## 2) set the magnet polarity [not-needed, since embedded into parser] from PIDPerfScripts.DataFuncs import CheckMagPol CheckMagPol(polarity) ## 3) set the particle name [not-needed, since embedded into parser] from PIDPerfScripts.DataFuncs import CheckPartType CheckPartType(particle) runMin = config.get('RunMin', 0) runMax = config.get('RunMax', -1) verbose = config.get('Verbose', True) maxFiles = config.get('MaxFiles', -1) ## a bit strange treatment of runMax in PIDCalib :-( # ## finally call the standard PIDCalib machinery with user-specified function # histos = makePlots(the_func, particle, stripping, polarity, trackcuts, runMin=runMin, runMax=runMax, verbose=verbose, maxFiles=maxFiles, parallel=config.get('Parallel', False)) if config.get('dbname', None): try: import ostap.io.zipshelve as DBASE with DBASE.open(config['dbname']) as db: if verbose: logger.info('Save data into %s' % config['dbname']) ## key = 'PIDCalib(%s)@Stripping%s/%s' % (particle, stripping, polarity) db[key] = histos db[key + 'Cuts'] = trackcuts if verbose: db.ls() except: logger.error('Unable to save data in DB') return histos
def __init__ ( self , dbase = "weights.db" , ## the name of data base with the weights factors = [] ) : # ## make some statistic # self._counter = SE () self._nzeroes = 0 self.vars = [] if not factors : return ## open database with DBASE.open ( dbase , 'r' ) as db : ## READONLY for k in db : e = db[k] if hasattr ( e , '__len__' ) : logger.debug( 'DBASE "%.15s" key "%.15s" #%d' % ( dbase , k, len( e ) ) ) ## loop over the weighting factors and build the function for f in factors : funval = f[0] ## accessor to the variable funname = f[1] ## address in database if isinstance ( funval , str ) : varnam = funval funval = lambda s : getattr ( s , varnam ) ## functions = db.get ( funname , [] ) ## db[ funname ] if not functions : logger.warning('No reweighting is available for %s, skip it' % funname ) merge = True if 2 < len ( f ) : merge = f[2] if not isinstance ( functions , ( list , tuple ) ) : functions = [ functions ] ## merge list of functions into single function if merge and 1 < len ( functions) : single_func = functions[0] * functions [1] for fun in functions [2:] : single_func *= fun functions = [ single_func ] self.vars += [ ( funname , funval , functions , SE() ) ]
## ix , iy = 60 , 40 hmc = h2_axes([20.0 / ix * i for i in range(ix + 1)], [15.0 / iy * i for i in range(iy + 1)]) ix, iy = 60, 36 hmcx = h1_axis([20.0 / ix * i for i in range(ix + 1)]) hmcy = h1_axis([15.0 / iy * i for i in range(iy + 1)]) ## prepare re-weighting machinery maxIter = 25 ## check database import os if not os.path.exists(dbname): logger.info('Create new weights DBASE') db = DBASE.open(dbname, 'c') ## create new empty db db.close() else: logger.info('Existing weights DBASE will be used') # ============================================================================= ## make reweighting iterations from ostap.tools.reweight import Weight, makeWeights, WeightingPlot, W2Data from ostap.fitting.selectors import SelectorWithVars, Variable import ostap.parallel.parallel_fill # ============================================================================= ## configuration of reweighting weightings = ( ## variable address in DB
def __init__ ( self , dbase = "weights.db" , ## the name of data base with the weights factors = [] ) : # ## make some statistic # self.__counter = SE () self.__nzeroes = 0 self.__vars = [] if not factors : return ## open database with DBASE.open ( dbase , 'r' ) as db : ## READONLY for k in db : e = db[k] if hasattr ( e , '__len__' ) : logger.debug( "DBASE ``%.15s'' key ``%.15s'' #%d" % ( dbase , k, len( e ) ) ) ## loop over the weighting factors and build the function for wvar in factors : funval = wvar.accessor ## accessor to the variable funname = wvar.address ## address in database merge = wvar.merge ## merge sequence of callables? skip = wvar.skip ## skip some of them? if isinstance ( funval , str ) : ## funval = operator.attrgetter( funval ) funval = AttrGetter( funval ) ## functions = db.get ( funname , [] ) ## db[ funname ] if not functions : logger.warning("No reweighting is available for ``%s'', skip it" % funname ) continue if not isinstance ( functions , ( list , tuple ) ) : functions = [ functions ] flen = len(functions) if 0 < skip and skip < flen : logger.info ("Use only %d first iterations for ``%s''" % ( skip , funname ) ) functions = functions[:skip] elif 0 > skip and abs(skip) < flen : logger.info ("Skip last %d iterations for ``%s''" % ( skip , funname ) ) functions = functions[:-1*skip] elif 0 == skip : pass else : logger.error("Invalid ``skip'' parameter %s/%d for ``%s''" % ( skip , flen , funname ) ) ## nullify the uncertainties except for the last histogram _functions = [] _first = True for f in reversed ( functions ) : if isinstance ( f , ROOT.TH1 ) and _first : ff = f.clone() for i in ff : v = float ( ff[i] ) ff[i] = VE(v,0) _functions.append ( ff ) _first = False else : _functions.append ( f ) _functions.reverse() functions = _functions ## merge list of functions into single function if merge and 1 < len ( functions) : ## single_func = functions[0] * functions [1] single_func = MULT ( functions[0] , functions [1] ) for fun in functions [2:] : ## multiply it ## single_func *= fun single_func = MULT ( single_func , fun ) functions = [ single_func ] self.__vars += [ ( funname , funval , functions , SE() ) ] self.__vars = tuple ( self.__vars )
def makeWeights ( dataset , plots = [] , database = "weights.db" , compare = None , ## comparison function delta = 0.001 , ## delta for ``mean'' weigth variation minmax = 0.05 , ## delta for ``minmax'' weigth variation power = 0 , ## auto-determination debug = True ) : ## save intermediate information in DB assert 0 < delta , "Reweighting: Invalid value for ``delta'' %s" % delta assert 0 < minmax , "Reweighting: Invalid value for ``minmax'' %s" % minmax power = power if power >= 1 else len ( plots ) nplots = len ( plots ) if 1 < nplots : import math fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots - 1.0 ) ) delta = delta * fudge_factor minmax = minmax * fudge_factor save_to_db = [] ## number of active plots for reweighting active = 0 ## loop over plots for wplot in plots : what = wplot.what ## variable/function to plot/compare how = wplot.how ## weight and/or additional cuts address = wplot.address ## address in database hdata0 = wplot.data ## original "DATA" object hmc0 = wplot.mc_histo ## original "MC" histogram ww = wplot.w ## relative weight # # normailze the data # hdata = hdata0 if isinstance ( hdata , ROOT.TH1 ) : hdata = hdata.density () # ## make a plot on (MC) data with the weight # dataset.project ( hmc0 , what , how ) st = hmc0.stat() mnmx = st.minmax() if iszero ( mnmx[0] ) : logger.warning ( "Reweighting: statistic goes to zero %s/``%s''" % ( st , address ) ) # ## normalize MC # hmc = hmc0.density() # ## calculate the reweighting factor : a bit conservative (?) # # this is the only important line # # try to exploit finer binning if possible if len ( hmc ) >= len( hdata ) : w = ( 1.0 / hmc ) * hdata ## NB! else : w = hdata / hmc ## NB! ## scale & get the statistics of weights w /= w.stat().mean().value() cnt = w.stat() # wvar = cnt.rms()/cnt.mean() logger.info ( 'Reweighting: %24s: mean/(min,max):%20s/(%.3f,%.3f) RMS:%s[%%]' % ( "``" + address + "''" , cnt.mean().toString('(%.2f+-%.2f)') , cnt.minmax()[0] , cnt.minmax()[1] , (wvar * 100).toString('(%.2f+-%.2f)') ) ) # ## make decision based on the variance of weights # mnw , mxw = cnt.minmax() if wvar.value() <= delta and abs ( mxw - mnw ) <= minmax : ## small variance? logger.info("Reweighting: No more reweights for ``%s'' [%.2f%%]/[(%+.1f,%+.1f)%%]" % \ ( address , wvar * 100 , ( mnw - 1 ) * 100 , ( mxw - 1 ) * 100 ) ) del w , hdata , hmc else : save_to_db.append ( ( address , ww , hdata0 , hmc0 , hdata , hmc , w ) ) # ## make a comparison (if needed) # if compare : compare ( hdata0 , hmc0 , address ) ## for single reweighting if 1 == nplots : power = 1 if power != nplots : logger.info ( "Reweighting: ``power'' is %g/#%d" % ( power , nplots ) ) active = len ( save_to_db ) if active != nplots : logger.info ( "Reweighting: number of ``active'' reweights %s/#%d" % ( active , nplots ) ) if database and save_to_db : power += ( nplots - active ) logger.info ("Reweighting: ``power'' is changed to %g" % power ) while database and save_to_db : entry = save_to_db.pop() address, ww , hd0, hm0, hd , hm , weight = entry eff_exp = 1.0 / power if 1 != nplots and 1 != ww : eff_exp *= ww logger.info ("Reweighting: apply ``effective exponent'' of %.3f for ``%s''" % ( eff_exp , address ) ) if 1 != eff_exp and 0 < eff_exp : weight = weight ** eff_exp ## print 'WEIGHT stat', eff_exp, weight.stat() ## hmmmm... needed ? yes! #if 1 < power : weight = weight ** ( 1.0 / power ) ## relative importance #if 1 != ww : # logger.info ("Reweighting: apply ``relative importance factor'' of %.3g for ``'%s'" % ( ww , address ) ) # weight = weight ** ww with DBASE.open ( database ) as db : db[address] = db.get( address , [] ) + [ weight ] if debug : addr = address + ':REWEIGHTING' db [ addr ] = db.get ( addr , [] ) + list ( entry[2:] ) del hd0, hm0 , hd , hm , weight , entry return active
def makeWeights( dataset, plots=[], database="weights.db", compare=None, ## comparison function delta=0.01, ## delta for ``mean'' weight variation minmax=0.03, ## delta for ``minmax'' weight variation power=None, ## auto-determination debug=True, ## save intermediate information in DB make_plots=False, ## make plots tag="Reweighting"): """The main function: perform one re-weighting iteration and reweight ``MC''-data set to looks as ``data''(reference) dataset >>> results = makeWeights ( ... dataset , ## data source to be reweighted (DataSet, TTree, abstract source) ... plots , ## reweighting plots ... database , ## datadabse to store/update reweigting results ... delta , ## stopping criteria for `mean` weight variation ... minmax , ## stopping criteria for `min/max` weight variation ... power , ## effective power to apply to the weigths ... debug = True , ## store debuig information in database ... make_plots = True , ## produce useful comparison plots ... tag = 'RW' ) ## tag for better printout If `make_plots = False`, it returns the tuple of active reweitings: >>> active = makeWeights ( ... , make_plots = False , ... ) Otherwise it also returns list of comparison plots >>> active, cmp_plots = makeWeights ( ... , make_plots = True , ... ) >>> for item in cmp_plots : ... what = item.what ... hdata = item.data ... hmc = item.mc ... hweight = item.weight If no more rewighting iteratios required, <code>active</code> is an empty tuple """ assert 0 < delta, "makeWeights(%s): Invalid value for ``delta'' %s" % ( tag, delta) assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % ( tag, minmax) from ostap.logger.colorized import allright, attention, infostr from ostap.utils.basic import isatty nplots = len(plots) ## if 1 < nplots : ## import math ## fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots - 1.0 ) ) ## delta = delta * fudge_factor ## minmax = minmax * fudge_factor ## list of plots to compare cmp_plots = [] ## reweighting summary table header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf', 'ww', 'exp') rows = {} save_to_db = [] ## number of active plots for reweighting for wplot in plots: what = wplot.what ## variable/function to plot/compare how = wplot.how ## weight and/or additional cuts address = wplot.address ## address in database hdata0 = wplot.data ## original "DATA" object hmc0 = wplot.mc_histo ## original "MC" histogram ww = wplot.w ## relative weight projector = wplot.projector ## projector for MC data ignore = wplot.ignore ## ignore for weigtht building? # # normalize the data # hdata = hdata0 if isinstance(hdata, ROOT.TH1): hdata = hdata.density() # ===================================================================== ## make a plot on (MC) data with the weight # ===================================================================== hmc0 = projector(dataset, hmc0, what, how) st = hmc0.stat() mnmx = st.minmax() if iszero(mnmx[0]): logger.warning("%s: statistic goes to zero %s/``%s''" % (tag, st, address)) elif mnmx[0] <= 0: logger.warning("%s: statistic is negative %s/``%s''" % (tag, st, address)) # ===================================================================== ## normalize MC # ===================================================================== hmc = hmc0.density() # ===================================================================== ## calculate the reweighting factor : a bit conservative (?) # this is the only important line # ===================================================================== # try to exploit finer binning if/when possible hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1) if hboth and 1 == hmc.dim () and 1 == hdata.dim () and \ len ( hmc ) >= len( hdata ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) and \ ( hmc.binsz() >= hdata.binsz() ) : w = (1.0 / hmc) * hdata ## NB! else: w = hdata / hmc ## NB! # ===================================================================== ## scale & get the statistics of weights w /= w.stat().mean().value() cnt = w.stat() # mnw, mxw = cnt.minmax() wvar = cnt.rms() / cnt.mean() good1 = wvar.value() <= delta good2 = abs(mxw - mnw) <= minmax good = good1 and good2 ## small variance? # c2ndf = 0 for i in w: c2ndf += w[i].chi2(1.0) c2ndf /= (len(w) - 1) ## build the row in the summary table row = address , \ '%-5.3f/%5.3f' % ( cnt.minmax()[0] , cnt.minmax()[1] ) , \ allright ( '+' ) if good2 else attention ( '-' ) , \ (wvar * 100).toString('%6.2f+-%-6.2f') , \ allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf ## make plots at the start of each iteration? if make_plots: item = ComparisonPlot(what, hdata, hmc, w) cmp_plots.append(item) row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else '']) rows[address] = row # ## make decision based on the variance of weights # mnw, mxw = cnt.minmax() if (not good) and (not ignore): ## small variance? save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w)) # ===================================================================== ## make a comparison (if needed) # ===================================================================== if compare: compare(hdata0, hmc0, address) active = tuple([p[0] for p in save_to_db]) nactive = len(active) if power and callable(power): eff_exp = power(nactive) elif isinstance(power, num_types) and 0 < power <= 1.5: eff_exp = 1.0 * power elif 1 == nactive and 1 < len(plots): eff_exp = 0.95 elif 1 == nactive: eff_exp = 1.00 else: eff_exp = 1.10 / max(nactive, 1) while database and save_to_db: entry = save_to_db.pop() address, ww, hd0, hm0, hd, hm, weight = entry cnt = weight.stat() mnw, mxw = cnt.minmax() ## avoid too large or too small weights for i in weight: w = weight[i] if w.value() < 0.5: weight[i] = VE(0.5, w.cov2()) elif w.value() > 2.0: weight[i] = VE(2.0, w.cov2()) if 1 < nactive and 1 != ww: eff_exp *= ww logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" % (tag, eff_exp, address)) if 1 != eff_exp and 0 < eff_exp: weight = weight**eff_exp row = list(rows[address]) row.append('%4.3f' % eff_exp) rows[address] = tuple(row) with DBASE.open(database) as db: db[address] = db.get(address, []) + [weight] if debug: addr = address + ':REWEIGHTING' db[addr] = db.get(addr, []) + list(entry[2:]) del hd0, hm0, hd, hm, weight, entry table = [header] for row in rows: table.append(rows[row]) import ostap.logger.table as Table logger.info( '%s, active:#%d \n%s ' % (tag, nactive, Table.table(table, title=tag, prefix='# ', alignment='lccccccc'))) cmp_plots = tuple(cmp_plots) return (active, cmp_plots) if make_plots else active
def __init__( self, dbase="weights.db", ## the name of data base with the weights factors=[]): # ## make some statistic # self.__counter = SE() self.__nzeroes = 0 self.__vars = [] if not factors: return self.__dbase = dbase ## open database self.__table = [('Reweighting', 'accessor', '#', 'merged?', 'skip')] rows = [] with DBASE.open(dbase, 'r') as db: ## READONLY logger.debug('Reweigting database: \n%s' % db.table(prefix='# ')) ## loop over the weighting factors and build the function for wvar in factors: funval = wvar.accessor ## accessor to the variable funname = wvar.address ## address in database merge = wvar.merge ## merge sequence of callables? skip = wvar.skip ## skip some of them? row = [] row.append(funname) if isinstance(funval, str): row.append(funval) ## funval = operator.attrgetter( funval ) funval = AttrGetter(funval) elif isinstance(funval, AttrGetter): atts = funval.attributes if 1 == len(atts): atts = atts[0] row.append(str(atts)) else: row.append('') ## functions = db.get(funname, []) ## db[ funname ] if not functions: logger.warning( "No reweighting is available for ``%s'', skip it" % funname) continue if not isinstance(functions, (list, tuple)): functions = [functions] flen = len(functions) if 0 < skip and skip < flen: logger.info("Use only %d first iterations for ``%s''" % (skip, funname)) functions = functions[:skip] elif 0 > skip and abs(skip) < flen: logger.info("Skip last %d iterations for ``%s''" % (skip, funname)) functions = functions[:skip] elif 0 == skip: pass else: logger.error( "Invalid ``skip'' parameter %s/%d for ``%s''" % (skip, flen, funname)) row.append('%d' % flen) ## nullify the uncertainties except for the last histogram _functions = [] _first = True for f in reversed(functions): if isinstance(f, ROOT.TH1) and _first: ff = f.clone() for i in ff: v = float(ff[i]) ff[i] = VE(v, 0) _functions.append(ff) _first = False else: _functions.append(f) _functions.reverse() functions = _functions row.append('+' if merge else '-') row.append('%s' % skip) ## merge list of functions into single function if merge and 1 < len(functions): ## single_func = functions[0] * functions [1] single_func = MULT(functions[0], functions[1]) for fun in functions[2:]: ## multiply it ## single_func *= fun single_func = MULT(single_func, fun) functions = [single_func] self.__vars += [(funname, funval, functions, SE())] self.__table.append(row) self.__vars = tuple(self.__vars)
def makeWeights( dataset, plots=[], database="weights.db", compare=None, ## comparison function delta=0.001, ## delta for ``mean'' weight variation minmax=0.05, ## delta for ``minmax'' weight variation power=0, ## auto-determination debug=True, ## save intermediate information in DB tag="Reweighting"): assert 0 < delta, "makeWeights(%s): Invalid value for ``delta'' %s" % ( tag, delta) assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % ( tag, minmax) from ostap.logger.colorized import allright, attention, infostr from ostap.utils.basic import isatty power = power if power >= 1 else len(plots) nplots = len(plots) if 1 < nplots: import math fudge_factor = math.sqrt(1.0 / max(2.0, nplots - 1.0)) delta = delta * fudge_factor minmax = minmax * fudge_factor save_to_db = [] ## number of active plots for reweighting for wplot in plots: what = wplot.what ## variable/function to plot/compare how = wplot.how ## weight and/or additional cuts address = wplot.address ## address in database hdata0 = wplot.data ## original "DATA" object hmc0 = wplot.mc_histo ## original "MC" histogram ww = wplot.w ## relative weight # # normailze the data # hdata = hdata0 if isinstance(hdata, ROOT.TH1): hdata = hdata.density() # ===================================================================== ## make a plot on (MC) data with the weight # ===================================================================== dataset.project(hmc0, what, how) st = hmc0.stat() mnmx = st.minmax() if iszero(mnmx[0]): logger.warning("Reweighting: statistic goes to zero %s/``%s''" % (st, address)) # ===================================================================== ## normalize MC # ===================================================================== hmc = hmc0.density() # ===================================================================== ## calculate the reweighting factor : a bit conservative (?) # this is the only important line # ===================================================================== # try to exploit finer binning if/when possible if isinstance ( hmc , ( ROOT.TH1F , ROOT.TH1D ) ) and \ isinstance ( hdata , ( ROOT.TH1F , ROOT.TH1D ) ) and \ len ( hmc ) >= len( hdata ) : w = (1.0 / hmc) * hdata ## NB! ## elif isinstance ( hmc , ( ROOT.TH2F , ROOT.TH2D ) ) and \ ## isinstance ( hdata , ( ROOT.TH2F , ROOT.TH2D ) ) and \ ## len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \ ## len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB! ## elif isinstance ( hmc , ( ROOT.TH3F , ROOT.TH3D ) ) and \ ## isinstance ( hdata , ( ROOT.TH3F , ROOT.TH3D ) ) and \ ## len ( hmc.GetXaxis() ) >= len( hdata.GetXaxis () ) and \ ## len ( hmc.GetYaxis() ) >= len( hdata.GetYaxis () ) and \ ## len ( hmc.GetZaxis() ) >= len( hdata.GetZaxis () ) : w = ( 1.0 / hmc ) * hdata ## NB! else: w = hdata / hmc ## NB! # ===================================================================== ## scale & get the statistics of weights w /= w.stat().mean().value() cnt = w.stat() # mnw, mxw = cnt.minmax() wvar = cnt.rms() / cnt.mean() good1 = wvar.value() <= delta good2 = abs(mxw - mnw) <= minmax good = good1 and good2 ## small variance? # afunc1 = allright if good1 else attention afunc2 = allright if good2 else attention # message = "%s: %24s:" % (tag, address) message += ' ' + 'mean=%12s' % cnt.mean().toString('(%4.2f+-%4.2f)') message += ' ' + afunc2('min/max=%-5.3f/%5.3f' % (cnt.minmax()[0], cnt.minmax()[1])) message += ' ' + afunc1('rms=%s[%%]' % (wvar * 100).toString('(%4.2f+-%4.2f)')) logger.info(message) # ## make decision based on the variance of weights # mnw, mxw = cnt.minmax() if good: ## small variance? message = "%s: No more reweights for %s" % (tag, address) message += ' ' + allright("min/max/rms=%+3.1f/%+3.1f/%3.1f[%%]" % ((mnw - 1) * 100, (mxw - 1) * 100, 100 * wvar)) logger.info(message) del w, hdata, hmc else: save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w)) # ===================================================================== ## make a comparison (if needed) # ===================================================================== if compare: compare(hdata0, hmc0, address) ## for single reweighting ## if 1 == nplots : power = 1 ## if power != nplots : # logger.info ( "%s: ``power'' is %g/#%d" % ( tag , power , nplots ) ) active = [p[0] for p in save_to_db] all = [p.address for p in plots] for i, a in enumerate(all): if a in active: if isatty(): all[i] = attention(a) else: all[i] = '*' + a + '*' else: if isatty(): all[i] = allright(a) logger.info("%s: reweights are: %s" % (tag, (', '.join(all)))) ## if len ( active ) != nplots : ## if database and save_to_db : ## power += ( nplots - len ( active ) ) ## logger.info ("%s: ``power'' is changed to %g" % ( tag , power ) ) nactive = len(active) while database and save_to_db: entry = save_to_db.pop() address, ww, hd0, hm0, hd, hm, weight = entry ## eff_exp = 1.0 / power ## eff_exp = 0.95 / ( 1.0 * nactive ) ** 0.5 cnt = weight.stat() mnw, mxw = cnt.minmax() if 0.95 < mnw and mxw < 1.05: eff_exp = 0.75 if 1 < nactive else 1.50 elif 0.90 < mnw and mxw < 1.10: eff_exp = 0.70 if 1 < nactive else 1.30 elif 0.80 < mnw and mxw < 1.20: eff_exp = 0.65 if 1 < nactive else 1.25 elif 0.70 < mnw and mxw < 1.30: eff_exp = 0.60 if 1 < nactive else 1.15 elif 0.50 < mnw and mxw < 1.50: eff_exp = 0.55 if 1 < nactive else 1.10 else: eff_exp = 0.50 if 1 < nactive else 1.0 ## print 'effective exponent is:', eff_exp , address , mnw , mxw , (1.0/mnw)*mnw**eff_exp , (1.0/mxw)*mxw**eff_exp if 1 < nactive and 1 != ww: eff_exp *= ww logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" % (tag, eff_exp, address)) if 1 != eff_exp and 0 < eff_exp: weight = weight**eff_exp ## print 'WEIGHT stat', eff_exp, weight.stat() ## hmmmm... needed ? yes! #if 1 < power : weight = weight ** ( 1.0 / power ) ## relative importance #if 1 != ww : # logger.info ("%s: apply ``relative importance factor'' of %.3g for ``'%s'" % ( tag , ww , address ) ) # weight = weight ** ww with DBASE.open(database) as db: db[address] = db.get(address, []) + [weight] if debug: addr = address + ':REWEIGHTING' db[addr] = db.get(addr, []) + list(entry[2:]) del hd0, hm0, hd, hm, weight, entry return active
def test_shelves(): db_sql_name = CU.CleanUp.tempfile ( suffix = '.sqldb' ) db_zip_name = CU.CleanUp.tempfile ( suffix = '.zipdb' ) db_bz2_name = CU.CleanUp.tempfile ( suffix = '.bz2db' ) db_root_name = CU.CleanUp.tempfile ( suffix = '.root' ) db_lz_name = CU.CleanUp.tempfile ( suffix = '.lzmadb' ) db_sql = sqliteshelve.open ( db_sql_name , 'c' ) db_zip = zipshelve.open ( db_zip_name , 'c' ) db_bz2 = bz2shelve.open ( db_bz2_name , 'c' ) db_root = rootshelve.open ( db_root_name , 'c' ) if lzshelve : db_lz = lzshelve.open ( db_lz_name , 'c' ) else : db_ls = None for k in data : db_sql [ k ] = data[k] db_zip [ k ] = data[k] db_bz2 [ k ] = data[k] if lzshelve : db_lz [ k ] = data[k] db_root [ k ] = data[k] logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) logger.info('ZipShelve #keys: %s' % len ( db_zip .keys() ) ) logger.info('Bz2Shelve #keys: %s' % len ( db_bz2 .keys() ) ) logger.info('RootShelve #keys: %s' % len ( db_root.keys() ) ) if lzshelve : logger.info('LzShelve #keys: %s' % len ( db_lz .keys() ) ) db_sql .close() db_zip .close() db_bz2 .close() db_root .close() if lzshelve : db_lz .close() logger.info('SQLiteShelve size: %d|%d ' % dbsize ( db_sql_name ) ) logger.info('ZipShelve size: %d|%d ' % dbsize ( db_zip_name ) ) logger.info('Bz2Shelve size: %d|%d ' % dbsize ( db_bz2_name ) ) logger.info('RootShelve size: %d|%d' % dbsize ( db_root_name ) ) if lzshelve : logger.info('LzShelve size: %d|%d ' % dbsize ( db_lz_name ) ) db_sql = sqliteshelve.open ( db_sql_name , 'r' ) db_zip = zipshelve.open ( db_zip_name , 'r' ) db_bz2 = bz2shelve.open ( db_bz2_name , 'r' ) if lzshelve : db_lz = lzshelve.open ( db_lz_name , 'r' ) db_root = rootshelve.open ( db_root_name , 'r' ) logger.info('SQLiteShelve #keys: %s' % len ( list ( db_sql .keys() ) ) ) logger.info('ZipShelve #keys: %s' % len ( db_zip .keys() ) ) logger.info('Bz2Shelve #keys: %s' % len ( db_bz2 .keys() ) ) if lzshelve : logger.info('LzShelve #keys: %s' % len ( db_lz .keys() ) ) logger.info('RootShelve #keys: %s' % len ( db_root.keys() ) ) with timing ( 'h2-read/SQL' ) : h2_sql = db_sql [ 'histo-2D'] with timing ( 'h2_read/ZIP' ) : h2_zip = db_zip [ 'histo-2D'] with timing ( 'h2_read/BZ2' ) : h2_bz2 = db_bz2 [ 'histo-2D'] if lzshelve : with timing ( 'h2_read/LZ' ) : h2_lz = db_lz [ 'histo-2D'] with timing ( 'h2_read/ROOT' ) : h2_root = db_root [ 'histo-2D'] with timing ( 'tu-read/SQL' ) : tu_sql = db_sql [ 'both' ] with timing ( 'tu_read/ZIP' ) : tu_zip = db_zip [ 'both' ] with timing ( 'tu_read/BZ2' ) : tu_bz2 = db_bz2 [ 'both' ] if lzshelve : with timing ( 'tu_read/LZ' ) : tu_lz = db_lz [ 'both' ] with timing ( 'tu_read/ROOT' ) : tu_root = db_root [ 'both' ] with timing ( 'h1-read/SQL' ) : h1_sql = db_sql [ 'histo-1D'] with timing ( 'h1-read/ZIP' ) : h1_zip = db_zip [ 'histo-1D'] with timing ( 'h1-read/BZ2' ) : h1_bz2 = db_bz2 [ 'histo-1D'] if lzshelve : with timing ( 'h1-read/LZ' ) : h1_lz = db_lz [ 'histo-1D'] with timing ( 'h1-read/ROOT' ) : h1_root = db_root [ 'histo-1D'] for i in h1_sql : v = h1_sql [i] - h1_zip [i] if not iszero ( v.value() ) : logger.error('Large difference for 1D histogram(1)!') v = h1_sql [i] - h1 [i] if not iszero ( v.value() ) : logger.error('Large difference for 1D histogram(2)!') v = h1_root [i] - h1 [i] if not iszero ( v.value() ) : logger.error('Large difference for 1D histogram(3)!') v = h1_bz2 [i] - h1 [i] if not iszero ( v.value() ) : logger.error('Large difference for 1D histogram(4)!') if lzshelve : v = h1_lz [i] - h1 [i] if not iszero ( v.value() ) : logger.error('Large difference for 1D histogram(5)!') for i in h2_sql : v = h2_sql [i] - h2_zip[i] if not iszero ( v.value() ) : logger.error('Large difference for 2D histogram(1)!') v = h2_sql [i] - h2 [i] if not iszero ( v.value() ) : logger.error('Large difference for 2D histogram(2)!') v = h2_root [i] - h2 [i] if not iszero ( v.value() ) : logger.error('Large difference for 2D histogram(3)!') v = h2_bz2 [i] - h2 [i] if not iszero ( v.value() ) : logger.error('Large difference for 2D histogram(4)!') if lzshelve : v = h2_lz [i] - h2 [i] if not iszero ( v.value() ) : logger.error('Large difference for 2D histogram(5)!') h1tq = tu_sql [1] h1tz = tu_zip [1] h1tr = tu_root[1] ## clone them dbs = [ db_sql , db_zip , db_bz2 , db_root ] if lzshelve : dbs.append ( db_lz ) for db in dbs : cdb = db.clone ( CU.CleanUp.tempfile ( suffix = '.db' ) ) logger.info('Cloned:') cdb.ls() del dbs with timing('Close SQL' ) : db_sql .close() with timing('Close ZIP' ) : db_zip .close() with timing('Close BZ2' ) : db_bz2 .close() if lzshelve : with timing('Close LZ' ) : db_lz .close() with timing('Close ROOT' ) : db_root.close() for dbase in ( sqliteshelve . tmpdb () , zipshelve . tmpdb () , bz2shelve . tmpdb () , ## lzshelve . tmpdb () , rootshelve . tmpdb () ) : with timing () : with dbase as db : db [ 'h1' ] = h1 db [ 'h2' ] = h2 db [ 'data' ] = data db [ 'histos'] = data['histos'] db.ls()
def test_shelves(): db_sql = sqliteshelve.open(db_sql_name, 'c') db_zip = zipshelve.open(db_zip_name, 'c') db_root = rootshelve.open(db_root_name, 'c') for k in data: db_sql[k] = data[k] db_zip[k] = data[k] db_root[k] = data[k] logger.info('SQLiteShelve keys: %s' % list(db_sql.keys())) logger.info('ZipShelve keys: %s' % list(db_zip.keys())) logger.info('RootShelve keys: %s' % list(db_root.keys())) db_sql.close() db_zip.close() db_root.close() logger.info('SQLiteShelve size: %d ' % os.path.getsize(db_sql_name)) logger.info('ZipShelve size: %d ' % os.path.getsize(db_zip_name)) logger.info('RootShelve size: %d ' % os.path.getsize(db_root_name)) db_sql = sqliteshelve.open(db_sql_name, 'r') db_zip = zipshelve.open(db_zip_name, 'r') db_root = rootshelve.open(db_root_name, 'r') logger.info('SQLiteShelve keys: %s' % list(db_sql.keys())) logger.info('ZipShelve keys: %s' % list(db_zip.keys())) logger.info('RootShelve keys: %s' % list(db_root.keys())) with timing('h2-read/SQL'): h2_sql = db_sql['histo-2D'] with timing('h2_read/ZIP'): h2_zip = db_zip['histo-2D'] with timing('h2_read/ROOT'): h2_root = db_root['histo-2D'] with timing('tu-read/SQL'): tu_sql = db_sql['both'] with timing('tu_read/ZIP'): tu_zip = db_zip['both'] with timing('tu_read/ROOT'): tu_root = db_root['both'] with timing('h1-read/SQL'): h1_sql = db_sql['histo-1D'] with timing('h1-read/ZIP'): h1_zip = db_zip['histo-1D'] with timing('h1-read/ROOT'): h1_root = db_root['histo-1D'] for i in h1_sql: v = h1_sql[i] - h1_zip[i] if not iszero(v.value()): logger.error('Large difference for 1D histogram(1)!') v = h1_sql[i] - h1[i] if not iszero(v.value()): logger.error('Large difference for 1D histogram(2)!') v = h1_root[i] - h1[i] if not iszero(v.value()): logger.error('Large difference for 1D histogram(3)!') for i in h2_sql: v = h2_sql[i] - h2_zip[i] if not iszero(v.value()): logger.error('Large difference for 2D histogram(1)!') v = h2_sql[i] - h2[i] if not iszero(v.value()): logger.error('Large difference for 2D histogram(2)!') v = h2_root[i] - h2[i] if not iszero(v.value()): logger.error('Large difference for 2D histogram(3)!') h1tq = tu_sql[1] h1tz = tu_zip[1] h1tr = tu_root[1] with timing('Close SQL'): db_sql.close() with timing('Close ZIP'): db_zip.close() with timing('Close ROOT'): db_root.close() with timing('Remove SQL'): os.remove(db_sql_name) with timing('Remove ZIP'): os.remove(db_zip_name) with timing('Remove ROOT'): os.remove(db_root_name) for dbase in (sqliteshelve.tmpdb(), zipshelve.tmpdb(), rootshelve.tmpdb()): with dbase as db: db['h1'] = h1 db['h2'] = h2 db.ls()
def makeWeights ( dataset , plots = [] , database = "weights.db" , compare = None , ## comparison function delta = 0.001 , ## delta for weigth variance debug = True ) : ## save intermediate information in DB more = False ## loop over plots for r in plots : what = r [0] ## variable/function to plot/compare how = r [1] ## weight or additional cuts address = r [2] ## address in database hdata0 = r [3] .clone () ## original "DATA" histogram hmc0 = r [4] if 4 < len(r) else hdata0.clone () ## original "MC" histogram # ## black magic to take into account the difference in bins and normalizations # hdata = hdata0 if hasattr ( hdata , 'rescale_bins' ) : hdata = hdata.rescale_bins ( 1.0 ) ## normalize the data: hmean = None if hasattr ( hdata , 'mean' ) and hasattr ( hdata , '__idiv__' ) : ## normalization point hmean = hdata.mean() # if isinstance ( hdata , ROOT.TH2 ) : hdata /= hdata ( *hmean ) else : hdata /= hdata ( hmean ) # ## make a plot on (MC) data with the weight # dataset.project ( hmc0 , what , how ) st = hmc0.stat() mnmx = st.minmax() if iszero ( mnmx[0] ) : logger.warning ( 'Statistic goes to zero %s/"%s"' % ( st , address ) ) # ## black magic to take into account the difference in bins and normalizations # hmc = hmc0.rescale_bins ( 1.0 ) if hmean is None : pass else : if isinstance ( hmc , ROOT.TH2 ) : hmc /= hmc ( *hmean ) else : hmc /= hmc ( hmean ) # ## calculate the reweighting factor : a bit conservative (?) power = min ( 2.0 , len ( plots ) ) ## NB! # this is the only important line # try to exploit finer binning if possible if len ( hmc ) >= len( hdata ) : w = ( ( 1.0 / hmc ) * hdata ) ** ( 1.0 / power ) ## NB! else : w = ( ( hdata / hmc ) ) ** ( 1.0 / power ) ## NB! # ## get the statistics of weights # cnt = w.stat() mnmx = cnt.minmax() if not mnmx [0] <= 1 <= mnmx[1] : w /= cnt.mean().value() cnt = w.stat() # wvar = cnt.rms()/cnt.mean() logger.info ( 'Reweighting "%-.15s: Mean/minmax:%s/(%.4f,%.4f) Vars:%s[%%]' % ( address , cnt.mean() , cnt.minmax()[0] , cnt.minmax()[1] , wvar * 100 ) ) # ## make decision based on variance of weights # if wvar.value() <= delta / len ( plots ) : ## small variance? save = False logger.info("No more reweighting for %s [%.3f%%]" % ( address , wvar * 100 ) ) else : save = True # ## make a comparison (if needed) # if compare : compare ( hdata0 , hmc0 , address ) ## update data base if save and database and address : with DBASE.open ( database ) as db : db[address] = db.get( address , [] ) + [ w ] if debug : addr = address + ':REWEIGHTING' entry = ( hdata0 , hmc0 , hdata , hmc , w ) db [ addr ] = db.get ( addr , [] ) + [ entry ] ## more = more or save del hdata0, hmc0, hdata, hmc, w return more