def test_toys2(): """Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `gen_pdf` - fit teach experiment with the PDF `fit_pdf` - store fit results - fill distributions of fit results """ logger = getLogger('test_toys2') results, stats = Toys.make_toys2(gen_pdf=gen_gauss, fit_pdf=fit_gauss, nToys=1000, data=[mass], gen_config={ 'nEvents': 200, 'sample': True }, fit_config={'silent': True}, gen_pars={ 'mean_GG': 0.4, 'sigma_GG': 0.1 }, fit_pars={ 'mean_GF': 0.4, 'sigma_GF': 0.1 }, silent=True, progress=True) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) ## make histos h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 50, 0, 0.80) h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 50, 0.05, 0.15) for r in results['mean_FG']: h_mean.Fill(r) for r in results['sigma_FG']: h_sigma.Fill(r) for h in (h_mean, h_sigma): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def test_significance_toys(): """Perform toy-study for significance of the signal - generate `nToys` pseudoexperiments using background-only hypothesis - fit each experiment with signal+background hypothesis - store fit results - fill distributions for fit results """ logger = getLogger('test_significance_toys') ## only background hypothesis bkg_only = Models.Bkg_pdf("BKG", xvar=mass, power=0, tau=0) signal = Models.Gauss_pdf('S', xvar=mass, mean=0.5, sigma=0.1) signal.mean.fix(0.4) signal.sigma.fix(0.1) ## signal + background hypothesis model = Models.Fit1D(signal=signal, background=1) model.background.tau.fix(0) results, stats = Toys.make_toys2( gen_pdf=bkg_only, fit_pdf=model, nToys=1000, data=[mass], gen_config={ 'nEvents': 100, 'sample': True }, fit_config={'silent': True}, gen_pars={'tau_BKG': 0.}, ## initial values for generation fit_pars={ 'B': 100, 'S': 10, 'phi0_Bkg_S': 0.0 }, ## initial fit values for parameters silent=True, progress=True) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) h_S = ROOT.TH1F(hID(), '#S', 60, 0, 60) for r in results['S']: h_S.Fill(r) for h in (h_S, ): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def test_parallel_toys(): """Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `pdf` - fit teach experiment with the same PDF - store fit results - calculate statistics of pulls - fill distributions of fit results - fill distributions of pulls """ results, stats = Toys.parallel_toys(pdf=gen_gauss, nToys=1000, nSplit=20, data=[mass], gen_config={'nEvents': 200}, fit_config={'silent': True}, init_pars={ 'mean_GG': 0.4, 'sigma_GG': 0.1 }, silent=True, progress=False) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) ## make histos: h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 100, 0, 0.80) h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 100, 0.05, 0.15) for r in results['mean_GG']: h_mean.Fill(r) for r in results['sigma_GG']: h_sigma.Fill(r) for h in (h_mean, h_sigma): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def test_frame1 ( ) : logger = getLogger ( 'test_frame1' ) if root_info < ( 6 , 16 ) : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain h1 = tree .draw ( 'b1' , '1/b1' ) h2 = frame.draw ( 'b1' , '1/b1' ) h1 = ROOT.TH1D( hID() , '' , 100 , 0 , 1000 ) h2 = ROOT.TH1D( hID() , '' , 100 , 0 , 1000 ) tree.project ( h1 , 'b1' , '1.0/b1' ) frame_project ( tree , h2 , 'b1' , '2.0/b1' ) with wait ( 3 ), use_canvas ( 'test_frame1' ) : h1.red () h2.blue () h1.draw () h2.draw ( 'same hist' )
## reload data data = Data('DATA_tree', testdata) mc = Data(tag_mc, testdata) wsum = mc.chain.sumVar('w') wvar = '%d*w/%s' % (len(data.chain), wsum.value()) nn = '%s' % (len(data.chain) * 1.0 / len(mc.chain)) for phi in vrange(0, 2 * math.pi, 10): dvar = '%.5f*x+%.5f*y' % (math.cos(phi), math.sin(phi)) mn, mx = data.chain.statVar(dvar).minmax() h1 = ROOT.TH1D(hID(), '', 100, *axis_range(mn, mx, delta=0.05)) h2 = h1.clone() h3 = h1.clone() data.chain.project(h1, dvar) ## data mc.chain.project(h2, dvar, nn) ## original (non-weighted) MC mc.chain.project(h3, dvar, wvar) ## weighted MC mn, mx = h1.minmax() mn, mx = axis_range(0, mx, delta=0.7) h1.SetMaximum(mx) h1.blue() h2.green() h3.red() h1.draw('')
def test_gbreweight(): logger = getLogger("test_gbreweight") try: from ostap.tools.reweighter import Reweighter rw = Reweighter() except ImportError: logger.error('GBReweighter is not available!') return if not os.path.exists(testdata): with timing("Prepare input data", logger=logger): prepare_data() # ========================================================================= ## Input data/mc samples # ========================================================================= data = Data('DATA_tree', testdata) mc = Data(tag_mc, testdata) ddata, wdata = data.chain.slice('x y', transpose=True) dmc, wmc = mc.chain.slice('x y', transpose=True) ## train BDT rw.reweight(original=dmc, target=ddata) ## new weights wnew = rw.weight(original=dmc) mc.chain.add_new_branch('w', wnew) ## reload data data = Data('DATA_tree', testdata) mc = Data(tag_mc, testdata) wsum = mc.chain.sumVar('w') wvar = '%d*w/%s' % (len(data.chain), wsum.value()) nn = '%s' % (len(data.chain) * 1.0 / len(mc.chain)) for phi in vrange(0, 2 * math.pi, 10): dvar = '%.5f*x+%.5f*y' % (math.cos(phi), math.sin(phi)) mn, mx = data.chain.statVar(dvar).minmax() h1 = ROOT.TH1D(hID(), '', 100, *axis_range(mn, mx, delta=0.05)) h2 = h1.clone() h3 = h1.clone() data.chain.project(h1, dvar) ## data mc.chain.project(h2, dvar, nn) ## original (non-weighted) MC mc.chain.project(h3, dvar, wvar) ## weighted MC mn, mx = h1.minmax() mn, mx = axis_range(0, mx, delta=0.7) h1.SetMaximum(mx) h1.blue() h2.green() h3.red() h1.draw('') h2.draw('same hist') h3.draw('same') time.sleep(2)
def ex_func(particle, dataset, plots=None, verbose=False): """The example of the actual function that build histos In this example it builds two histograms: - accepted events - rejected events For dataset structure and variable names see: https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage The efficiency historgam can be later build in Ostap as : >>> h_acc = ... >>> h_rej = ... >>> eff = 1/(1 + h_rej/h_acc) """ ## we need ROOT and Ostap machinery! import ROOT from ostap.core.pyrouts import hID # 1) define PID-cut and its negation # For dataset structure and variable names see: # https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage accepted = 'Pi_ProbNNpi>0.5' ## note variable names rejected = 'Pi_ProbNNpi<0.5' ## note variable names # 2) prepare the histogtrams hA = ROOT.TH2D(hID(), 'Accepted(%s)' % accepted, 15, 0, 150000, 10, 2, 5) h1.Sumw2() hR = ROOT.TH2D(hID(), 'Rejected(%s)' % rejected, 15, 0, 150000, 10, 2, 5) h2.Sumw2() # 3) fill the historgams with 'accepted' and 'rejected' events # For dataset structure and variable names see: # https://twiki.cern.ch/twiki/bin/view/LHCb/PIDCalibPackage vlst = ROOT.RooArgList() vlst.add(dataset.Pi_P) ## note variable names vlst.add(dataset.Pi_Eta) ## note variable name hA = DataSet.fillHistogram(hA, vlst, accepted) ## fill histo hR = DataSet.fillHistogram(hR, vlst, rejected) ## fill histo # ## and now update the output # if not plots: hA.SetName(hA.GetTitle()) hR.SetName(hR.GetTitle()) plots = [hA, hR] ## "Accepted" & "Rejected" histograms else: plots[0] += hA ## "Accepted" histogram plots[1] += hR ## "Rejected" histogram hA.Delete() hR.Delete() if hA: del hA if hR: del hR if verbose: logger.info('Accepted histo: %s' % plots[0].stat()) logger.info('Rejected histo: %s' % plots[1].stat()) return plots
def __init__( self, category, ## accessor to category N, ## number of categories methods, ## list of TMVA methods variables, ## list of variables signal, ## signal tree background, ## background tree signal_cuts='', ## signal cuts background_cuts='', ## background cuts spectators=[], bookingoptions="Transformations=I;D;P;G,D", configuration="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V", signal_weight=None, background_weight=None, name='TMVAChopper', ## the name verbose=False, ## verbose ? chop_signal=False, ## chop the signal ? chop_background=True): ## chop the background ? """Create TMVA ``chopping'' trainer >>> N = 11 >>> trainer = Trainer ( ... category = '137*evt+813*run' , ... N = N , ... methods = [ # type name configuration ... ( ROOT.TMVA.Types.kMLP , 'MLP' , 'H:!V:EstimatorType=CE:VarTransform=N:NCycles=200:HiddenLayers=N+3:TestRate=5:!UseRegulator' ) , ... ( ROOT.TMVA.Types.kBDT , 'BDTG' , 'H:!V:NTrees=100:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2' ) , ... ( ROOT.TMVA.Types.kCuts , 'Cuts' , 'H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart' ) , ... ( ROOT.TMVA.Types.kFisher , 'Fisher' , 'H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10' ), ... ( ROOT.TMVA.Types.kLikelihood , 'Likelihood' , 'H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50' ) ] , ... variables = [ 'var1' , 'var2' , 'var3' ] , ## Variables to use in the training ... signal = signal_tree , ## TTree/TChain with ``signal'' sample ... background = background_tree , ## TTree/TChain with ``background'' sample ... name = 'TMVAChopper' , ... verbose = False ) """ assert isinstance( N, (int, long)) and 1 < N, "Invalid number of categories" self.__chop_signal = True if chop_signal else False self.__chop_background = True if chop_background else False assert self.__chop_signal or self.__chop_background, "Neither signal nor background chopping" self.__category = category self.__N = N self.__signal = signal self.__background = background self.__methods = tuple(methods) self.__signal_weight = signal_weight self.__signal_cuts = ROOT.TCut(signal_cuts) self.__background_weight = background_weight self.__background_cuts = ROOT.TCut(background_cuts) self.__variables = tuple(variables) self.__spectators = tuple(spectators) self.__bookingoptions = bookingoptions self.__configuration = configuration self.__name = name self.__verbose = True if verbose else False self.__sig_histos = () self.__bkg_histos = () cat = '(%s)%%%d' % (self.category, self.N) if self.chop_signal: hs1 = ROOT.TH1F(hID(), 'Signal categories', self.N * 5, -0.5, self.N - 1) hs2 = h1_axis([-0.5 + i for i in range(self.N + 1)], title=hs1.GetTitle()) self.signal.project(hs1, cat, self.signal_cuts) self.signal.project(hs2, cat, self.signal_cuts) self.__sig_histos = hs1, hs2 st = hs2.stat() if 0 >= st.min(): logger.warning("Some signal categories are empty!") logger.info('Signal category population mean/rms: %s/%s' % (st.mean(), st.rms())) if self.chop_background: hb1 = ROOT.TH1F(hID(), 'Background categories', self.N * 5, -0.5, self.N - 1) hb2 = h1_axis([-0.5 + i for i in range(self.N + 1)], title=hb1.GetTitle()) self.background.project(hb1, cat, self.background_cuts) self.background.project(hb2, cat, self.background_cuts) self.__bkg_histos = hb1, hb2 ## st = hb2.stat() if 0 >= st.min(): logger.warning("Some background categories are empty!") logger.info('Background category population mean/rms: %s/%s' % (st.mean(), st.rms())) ## trick to please Kisa from ostap.trees.trees import Chain self.__signal = Chain(signal) self.__background = Chain(background) ## book the trainers self.__trainers = () self.__weights_files = [] self.__class_files = [] self.__output_files = [] self.__tar_file = None self.__log_file = None