def fit_mass(data, column, x, sig_pdf=None, bkg_pdf=None, n_sig=None, n_bkg=None, blind=False, nll_profile=False, second_storage=None, log_plot=False, pulls=True, sPlot=False, bkg_in_region=False, importance=3, plot_importance=3): """Fit a given pdf to a variable distribution Parameter --------- data : |hepds_type| The data containing the variable to fit to column : str The name of the column to fit the pdf to sig_pdf : RooFit pdf The signal Probability Density Function. The variable to fit to has to be named 'x'. bkg_pdf : RooFit pdf The background Probability Density Function. The variable to fit to has to be named 'x'. n_sig : None or numeric The number of signals in the data. If it should be fitted, use None. n_bkg : None or numeric The number of background events in the data. If it should be fitted, use None. blind : boolean or tuple(numberic, numberic) If False, the data is fitted. If a tuple is provided, the values are used as the lower (the first value) and the upper (the second value) limit of a blinding region, which will be omitted in plots. Additionally, no true number of signal will be returned but only fake. nll_profile : boolean If True, a Negative Log-Likelihood Profile will be generated. Does not work with blind fits. second_storage : |hepds_type| A second data-storage that will be concatenated with the first one. importance : |importance_type| |importance_docstring| plot_importance : |plot_importance_type| |plot_importance_docstring| Return ------ tuple(numerical, numerical) Return the number of signals and the number of backgrounds in the signal-region. If a blind fit is performed, the signal will be a fake number. If no number of background events is required, -999 will be returned. """ if not (isinstance(column, str) or len(column) == 1): raise ValueError("Fitting to several columns " + str(column) + " not supported.") if type(sig_pdf) == type(bkg_pdf) == None: raise ValueError("sig_pdf and bkg_pdf are both None-> no fit possible") if blind is not False: lower_blind, upper_blind = blind blind = True n_bkg_below_sig = -999 # create data data_name = data.name data_array, _t1, _t2 = data.make_dataset(second_storage, columns=column) del _t1, _t2 # double crystalball variables min_x, max_x = min(data_array[column]), max(data_array[column]) # x = RooRealVar("x", "x variable", min_x, max_x) # create data data_array = np.array([i[0] for i in data_array.as_matrix()]) data_array.dtype = [('x', np.float64)] tree1 = array2tree(data_array, "x") data = RooDataSet("data", "Data", RooArgSet(x), RooFit.Import(tree1)) # # TODO: export somewhere? does not need to be defined inside... # mean = RooRealVar("mean", "Mean of Double CB PDF", 5280, 5100, 5600)#, 5300, 5500) # sigma = RooRealVar("sigma", "Sigma of Double CB PDF", 40, 0.001, 200) # alpha_0 = RooRealVar("alpha_0", "alpha_0 of one side", 5.715)#, 0, 150) # alpha_1 = RooRealVar("alpha_1", "alpha_1 of other side", -4.019)#, -200, 0.) # lambda_0 = RooRealVar("lambda_0", "Exponent of one side", 3.42)#, 0, 150) # lambda_1 = RooRealVar("lambda_1", "Exponent of other side", 3.7914)#, 0, 500) # # # TODO: export somewhere? pdf construction # frac = RooRealVar("frac", "Fraction of crystal ball pdfs", 0.479, 0.01, 0.99) # # crystalball1 = RooCBShape("crystallball1", "First CrystalBall PDF", x, # mean, sigma, alpha_0, lambda_0) # crystalball2 = RooCBShape("crystallball2", "Second CrystalBall PDF", x, # mean, sigma, alpha_1, lambda_1) # doubleCB = RooAddPdf("doubleCB", "Double CrystalBall PDF", # crystalball1, crystalball2, frac) # n_sig = RooRealVar("n_sig", "Number of signals events", 10000, 0, 1000000) # test input if n_sig == n_bkg == 0: raise ValueError("n_sig as well as n_bkg is 0...") if n_bkg is None: n_bkg = RooRealVar("n_bkg", "Number of background events", 10000, 0, 500000) elif n_bkg >= 0: n_bkg = RooRealVar("n_bkg", "Number of background events", int(n_bkg)) else: raise ValueError("n_bkg is not >= 0 or None") if n_sig is None: n_sig = RooRealVar("n_sig", "Number of signal events", 1050, 0, 200000) # START BLINDING blind_cat = RooCategory("blind_cat", "blind state category") blind_cat.defineType("unblind", 0) blind_cat.defineType("blind", 1) if blind: blind_cat.setLabel("blind") blind_n_sig = RooUnblindPrecision("blind_n_sig", "blind number of signals", "wasistdas", n_sig.getVal(), 10000, n_sig, blind_cat) else: # blind_cat.setLabel("unblind") blind_n_sig = n_sig print "n_sig value " + str(n_sig.getVal()) # raw_input("blind value " + str(blind_n_sig.getVal())) # n_sig = blind_n_sig # END BLINDING elif n_sig >= 0: n_sig = RooRealVar("n_sig", "Number of signal events", int(n_sig)) else: raise ValueError("n_sig is not >= 0") # if not blind: # blind_n_sig = n_sig # # create bkg-pdf # lambda_exp = RooRealVar("lambda_exp", "lambda exp pdf bkg", -0.00025, -1., 1.) # bkg_pdf = RooExponential("bkg_pdf", "Background PDF exp", x, lambda_exp) if blind: comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF", RooArgList(sig_pdf, bkg_pdf), RooArgList(blind_n_sig, n_bkg)) else: comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF", RooArgList(sig_pdf, bkg_pdf), RooArgList(n_sig, n_bkg)) # create test dataset # mean_gauss = RooRealVar("mean_gauss", "Mean of Gaussian", 5553, -10000, 10000) # sigma_gauss = RooRealVar("sigma_gauss", "Width of Gaussian", 20, 0.0001, 300) # gauss1 = RooGaussian("gauss1", "Gaussian test dist", x, mean_gauss, sigma_gauss) # lambda_data = RooRealVar("lambda_data", "lambda exp data", -.002) # exp_data = RooExponential("exp_data", "data example exp", x, lambda_data) # frac_data = RooRealVar("frac_data", "Fraction PDF of data", 0.15) # # data_pdf = RooAddPdf("data_pdf", "Data PDF", gauss1, exp_data, frac_data) # data = data_pdf.generate(RooArgSet(x), 30000) # data.printValue() # xframe = x.frame() # data_pdf.plotOn(xframe) # print "n_cpu:", meta_config.get_n_cpu() # input("test") # comb_pdf.fitTo(data, RooFit.Extended(ROOT.kTRUE), RooFit.NumCPU(meta_config.get_n_cpu())) # HACK to get 8 cores in testing c5 = TCanvas("c5", "RooFit pdf not fit vs " + data_name) c5.cd() x_frame1 = x.frame() # data.plotOn(x_frame1) # comb_pdf.pdfList()[1].plotOn(x_frame1) if __name__ == "__main__": n_cpu = 8 else: n_cpu = meta_config.get_n_cpu() print "n_cpu = ", n_cpu # HACK # n_cpu = 8 result_fit = comb_pdf.fitTo(data, RooFit.Minos(ROOT.kTRUE), RooFit.Extended(ROOT.kTRUE), RooFit.NumCPU(n_cpu)) # HACK end if bkg_in_region: x.setRange("signal", bkg_in_region[0], bkg_in_region[1]) bkg_pdf_fitted = comb_pdf.pdfList()[1] int_argset = RooArgSet(x) # int_argset = x # int_argset.setRange("signal", bkg_in_region[0], bkg_in_region[1]) integral = bkg_pdf_fitted.createIntegral(int_argset, RooFit.NormSet(int_argset), RooFit.Range("signal")) bkg_cdf = bkg_pdf_fitted.createCdf(int_argset, RooFit.Range("signal")) bkg_cdf.plotOn(x_frame1) # integral.plotOn(x_frame1) n_bkg_below_sig = integral.getVal(int_argset) * n_bkg.getVal() x_frame1.Draw() if plot_importance >= 3: c2 = TCanvas("c2", "RooFit pdf fit vs " + data_name) c2.cd() x_frame = x.frame() # if log_plot: # c2.SetLogy() # x_frame.SetTitle("RooFit pdf vs " + data_name) x_frame.SetTitle(data_name) if pulls: pad_data = ROOT.TPad("pad_data", "Pad with data and fit", 0, 0.33, 1, 1) pad_pulls = ROOT.TPad("pad_pulls", "Pad with data and fit", 0, 0, 1, 0.33) pad_data.SetBottomMargin(0.00001) pad_data.SetBorderMode(0) if log_plot: pad_data.SetLogy() pad_pulls.SetTopMargin(0.00001) pad_pulls.SetBottomMargin(0.2) pad_pulls.SetBorderMode(0) pad_data.Draw() pad_pulls.Draw() pad_data.cd() else: if log_plot: c2.SetLogy() if blind: # HACK column = 'x' # END HACK x.setRange("lower", min_x, lower_blind) x.setRange("upper", upper_blind, max_x) range_str = "lower,upper" lower_cut_str = str( min_x) + "<=" + column + "&&" + column + "<=" + str(lower_blind) upper_cut_str = str( upper_blind) + "<=" + column + "&&" + column + "<=" + str(max_x) sideband_cut_str = "(" + lower_cut_str + ")" + "||" + "(" + upper_cut_str + ")" n_entries = data.reduce( sideband_cut_str).numEntries() / data.numEntries() # raw_input("n_entries: " + str(n_entries)) if plot_importance >= 3: data.plotOn(x_frame, RooFit.CutRange(range_str), RooFit.NormRange(range_str)) comb_pdf.plotOn( x_frame, RooFit.Range(range_str), RooFit.Normalization(n_entries, RooAbsReal.Relative), RooFit.NormRange(range_str)) if pulls: # pull_hist(pull_frame=x_frame, pad_data=pad_data, pad_pulls=pad_pulls) x_frame_pullhist = x_frame.pullHist() else: if plot_importance >= 3: data.plotOn(x_frame) comb_pdf.plotOn(x_frame) if pulls: pad_pulls.cd() x_frame_pullhist = x_frame.pullHist() pad_data.cd() comb_pdf.plotOn(x_frame, RooFit.Components(sig_pdf.namePtr().GetName()), RooFit.LineStyle(ROOT.kDashed)) comb_pdf.plotOn(x_frame, RooFit.Components(bkg_pdf.namePtr().GetName()), RooFit.LineStyle(ROOT.kDotted)) # comb_pdf.plotPull(n_sig) if plot_importance >= 3: x_frame.Draw() if pulls: pad_pulls.cd() x_frame.SetTitleSize(0.05, 'Y') x_frame.SetTitleOffset(0.7, 'Y') x_frame.SetLabelSize(0.04, 'Y') # c11 = TCanvas("c11", "RooFit\ pulls" + data_name) # c11.cd() # frame_tmp = x_frame frame_tmp = x.frame() # frame_tmp.SetTitle("significance") frame_tmp.SetTitle("Roofit\ pulls\ " + data_name) frame_tmp.addObject(x_frame_pullhist) frame_tmp.SetMinimum(-5) frame_tmp.SetMaximum(5) # frame_tmp.GetYaxis().SetTitle("significance") frame_tmp.GetYaxis().SetNdivisions(5) frame_tmp.SetTitleSize(0.1, 'X') frame_tmp.SetTitleOffset(1, 'X') frame_tmp.SetLabelSize(0.1, 'X') frame_tmp.SetTitleSize(0.1, 'Y') frame_tmp.SetTitleOffset(0.5, 'Y') frame_tmp.SetLabelSize(0.1, 'Y') frame_tmp.Draw() # raw_input("") if not blind and nll_profile: # nll_range = RooRealVar("nll_range", "Signal for nLL", n_sig.getVal(), # -10, 2 * n_sig.getVal()) sframe = n_sig.frame(RooFit.Bins(20), RooFit.Range(1, 1000)) # HACK for best n_cpu lnL = comb_pdf.createNLL(data, RooFit.NumCPU(8)) # HACK end lnProfileL = lnL.createProfile(ROOT.RooArgSet(n_sig)) lnProfileL.plotOn(sframe, RooFit.ShiftToZero()) c4 = TCanvas("c4", "NLL Profile") c4.cd() # input("press ENTER to show plot") sframe.Draw() if plot_importance >= 3: pass params = comb_pdf.getVariables() params.Print("v") # print bkg_cdf.getVal() if sPlot: sPlotData = ROOT.RooStats.SPlot( "sPlotData", "sPlotData", data, # variable fitted to, RooDataSet comb_pdf, # fitted pdf ROOT.RooArgList( n_sig, n_bkg, # NSigB0s )) sweights = np.array([ sPlotData.GetSWeight(i, 'n_sig') for i in range(data.numEntries()) ]) return n_sig.getVal(), n_bkg_below_sig, sweights if blind: return blind_n_sig.getVal(), n_bkg_below_sig, comb_pdf else: return n_sig.getVal(), n_bkg_below_sig, comb_pdf
def rooFit202(): print ">>> setup model component: gaussian signals and Chebychev polynomial background..." x = RooRealVar("x", "x", 0, 10) mean = RooRealVar("mean", "mean of gaussian", 5) sigma1 = RooRealVar("sigma1", "width of gaussian", 0.5) sigma2 = RooRealVar("sigma2", "width of gaussian", 1.0) sig1 = RooGaussian("sig1", "Signal component 1", x, mean, sigma1) sig2 = RooGaussian("sig2", "Signal component 2", x, mean, sigma2) a0 = RooRealVar("a0", "a0", 0.5, 0., 1.) a1 = RooRealVar("a1", "a1", -0.2, 0., 1.) bkg = RooChebychev("bkg", "Background", x, RooArgList(a0, a1)) # Sum the signal components into a composite signal p.d.f. sig1frac = RooRealVar("sig1frac", "fraction of component 1 in signal", 0.8, 0., 1.) sig = RooAddPdf("sig", "Signal", RooArgList(sig1, sig2), RooArgList(sig1frac)) print ">>>\n>>> METHOD 1" print ">>> construct extended composite model..." # Sum the composite signal and background into an extended pdf nsig*sig+nbkg*bkg nsig = RooRealVar("nsig", "number of signal events", 500, 0., 10000) nbkg = RooRealVar("nbkg", "number of background events", 500, 0, 10000) model = RooAddPdf("model", "(g1+g2)+a", RooArgList(bkg, sig), RooArgList(nbkg, nsig)) print ">>> sample, fit and plot extended model...\n" # Generate a data sample of expected number events in x from model # nsig + nbkg = model.expectedEvents() # NOTE: since the model predicts a specific number events, one can # omit the requested number of events to be generated # Introduce Poisson fluctuation with Extended(kTRUE) data = model.generate(RooArgSet(x), Extended(kTRUE)) # RooDataSet # Fit model to data, extended ML term automatically included # NOTE: Composite extended pdfs can only be successfully fit if the extended likelihood # term -log(Poisson(Nobs,Nexp)) is included in the minimization because they have # one extra degree of freedom in their parameterization that is constrained by # this extended term. If a pdf is capable of calculating an extended term (i.e. # any extended RooAddPdf), the extended term is AUTOMATICALLY included in the # likelihood calculation. Override this behaviour with Extended(): # Extended(kTRUE) ADD extended likelihood term # Extended(kFALSE) DO NOT ADD extended likelihood #model.fitTo(data,Extended(kTRUE)) model.fitTo(data) print "\n>>> plot data, model and model components..." # Plot data and PDF overlaid, use expected number of events for pdf projection # normalization, rather than observed number of events, data.numEntries() frame1 = x.frame(Title("extended ML fit example")) # RooPlot data.plotOn(frame1, Binning(30), Name("data")) model.plotOn(frame1, Normalization(1.0, RooAbsReal.RelativeExpected), Name("model")) # Overlay the background components of model # NOTE: By default, the pdf is normalized to event count of the last dataset added # to the plot frame. Use "RelativeExpected" to normalize to the expected # event count of the pdf instead argset1 = RooArgSet(bkg) argset2 = RooArgSet(sig1) argset3 = RooArgSet(sig2) argset4 = RooArgSet(bkg, sig2) model.plotOn(frame1, Components(argset1), LineStyle(kDashed), LineColor(kBlue), Normalization(1.0, RooAbsReal.RelativeExpected), Name("bkg")) #model.plotOn(frame1,Components(argset1),LineStyle(kDashed),LineColor(kBlue), Name("bkg2")) model.plotOn(frame1, Components(argset2), LineStyle(kDotted), LineColor(kMagenta), Normalization(1.0, RooAbsReal.RelativeExpected), Name("sig1")) model.plotOn(frame1, Components(argset3), LineStyle(kDotted), LineColor(kPink), Normalization(1.0, RooAbsReal.RelativeExpected), Name("sig2")) model.plotOn(frame1, Components(argset4), LineStyle(kDashed), LineColor(kAzure - 4), Normalization(1.0, RooAbsReal.RelativeExpected), Name("bkgsig2")) print "\n>>> structure of composite pdf:" model.Print("t") # "tree" mode print "\n>>> parameters:" params = model.getVariables() # RooArgSet params.Print("v") params.Print() print "\n>>> params.find(\"...\").getVal():" print ">>> sigma1 = %.2f" % params.find("sigma1").getVal() print ">>> sigma2 = %.2f" % params.find("sigma2").getVal() print ">>> nsig = %6.2f, sig1frac = %5.2f" % ( params.find("nsig").getVal(), params.find("sig1frac").getVal()) print ">>> nbkg = %6.2f" % params.find("nbkg").getVal() print ">>>\n>>> components:" comps = model.getComponents() # RooArgSet sig = comps.find("sig") # RooAbsArg sigVars = sig.getVariables() # RooArgSet sigVars.Print() print ">>>\n>>> METHOD 2" print ">>> construct extended components first..." # Associated nsig/nbkg as expected number of events with sig/bkg nsig = RooRealVar("nsig", "number of signal events", 500, 0., 10000) nbkg = RooRealVar("nbkg", "number of background events", 500, 0, 10000) esig = RooExtendPdf("esig", "extended signal pdf", sig, nsig) ebkg = RooExtendPdf("ebkg", "extended background pdf", bkg, nbkg) print ">>> sum extended components without coefficients..." # Construct sum of two extended p.d.f. (no coefficients required) model2 = RooAddPdf("model2", "(g1+g2)+a", RooArgList(ebkg, esig)) # METHOD 2 is functionally completely equivalent to METHOD 1. # Its advantage is that the yield parameter is associated to the shape pdf # directly, while in METHOD 1 the association is made after constructing # a RooAddPdf. Also, class RooExtendPdf offers extra functionality to # interpret event counts in a different range. print ">>> plot model..." model2.plotOn(frame1, LineStyle(kDashed), LineColor(kRed), Normalization(1.0, RooAbsReal.RelativeExpected), Name("model2")) print ">>> draw on canvas..." canvas = TCanvas("canvas", "canvas", 100, 100, 800, 600) legend = TLegend(0.2, 0.85, 0.4, 0.65) legend.SetTextSize(0.032) legend.SetBorderSize(0) legend.SetFillStyle(0) gPad.SetLeftMargin(0.14) gPad.SetRightMargin(0.02) frame1.GetYaxis().SetLabelOffset(0.008) frame1.GetYaxis().SetTitleOffset(1.4) frame1.GetYaxis().SetTitleSize(0.045) frame1.GetXaxis().SetTitleSize(0.045) frame1.Draw() legend.AddEntry("data", "data", 'LEP') legend.AddEntry("model", "composite model", 'L') legend.AddEntry("model2", "composite model (method 2)", 'L') legend.AddEntry("bkg", "background only", 'L') #legend.AddEntry("bkg2", "background only (no extended norm)", 'L') legend.AddEntry("sig1", "signal 1", 'L') legend.AddEntry("sig2", "signal 2", 'L') legend.AddEntry("bkgsig2", "background + signal 2", 'L') legend.Draw() canvas.SaveAs("rooFit202.png")
def rooFit201(): print ">>> setup model component: gaussian signals and Chebychev polynomial background..." x = RooRealVar("x","x",0,11) mean = RooRealVar("mean","mean of gaussians",5) sigma1 = RooRealVar("sigma1","width of gaussians",0.5) sigma2 = RooRealVar("sigma2","width of gaussians",1) sig1 = RooGaussian("sig1","Signal component 1",x,mean,sigma1) sig2 = RooGaussian("sig2","Signal component 2",x,mean,sigma2) a0 = RooRealVar("a0","a0",0.5,0.,1.) a1 = RooRealVar("a1","a1",-0.2,0.,1.) bkg = RooChebychev("bkg","Background",x,RooArgList(a0,a1)) print ">>>\n>>> METHOD 1 - Two RooAddPdfs" print ">>> add signal components..." # Sum the signal components into a composite signal p.d.f. sig1frac = RooRealVar("sig1frac","fraction of component 1 in signal",0.8,0.,1.) sig = RooAddPdf("sig","Signal",RooArgList(sig1,sig2),RooArgList(sig1frac)) print ">>> add signal and background..." # Sum the composite signal and background bkgfrac = RooRealVar("bkgfrac","fraction of background",0.5,0.,1.) model = RooAddPdf("model","g1+g2+a",RooArgList(bkg,sig),RooArgList(bkgfrac)) print ">>> sample, fit and plot model..." data = model.generate(RooArgSet(x),1000) # RooDataSet model.fitTo(data) frame1 = x.frame(Title("Example of composite pdf=(sig1+sig2)+bkg")) # RooPlot data.plotOn(frame1,Binning(50),Name("data")) model.plotOn(frame1,Name("model")) # Overlay the background component of model with a dashed line argset1 = RooArgSet(bkg) model.plotOn(frame1,Components(argset1),LineWidth(2),Name("bkg")) #,LineStyle(kDashed) # Overlay the background+sig2 components of model with a dotted line argset2 = RooArgSet(bkg,sig2) model.plotOn(frame1,Components(argset2),LineWidth(2),LineStyle(kDashed),LineColor(kAzure-4),Name("bkgsig2")) #,LineStyle(kDotted) print "\n>>> structure of composite pdf:" model.Print("t") # "tree" mode print "\n>>> parameters:" params = model.getVariables() # RooArgSet params.Print("v") params.Print() print "\n>>> params.find(\"...\").getVal():" print ">>> sigma1 = %.2f" % params.find("sigma1").getVal() print ">>> sigma2 = %.2f" % params.find("sigma2").getVal() print ">>> bkgfrac = %5.2f" % params.find("bkgfrac").getVal() print ">>> sig1frac = %5.2f" % params.find("sig1frac").getVal() print ">>>\n>>> components:" comps = model.getComponents() # RooArgSet sig = comps.find("sig") # RooAbsArg sigVars = sig.getVariables() # RooArgSet sigVars.Print() print ">>>\n>>> METHOD 2 - One RooAddPdf with recursive fractions" # Construct sum of models on one go using recursive fraction interpretations # model2 = bkg + (sig1 + sig2) model2 = RooAddPdf("model","g1+g2+a",RooArgList(bkg,sig1,sig2),RooArgList(bkgfrac,sig1frac),kTRUE) # NB: Each coefficient is interpreted as the fraction of the # left-hand component of the i-th recursive sum, i.e. # sum4 = A + ( B + ( C + D ) ) # with fraction fA, fB and fC expands to # sum4 = fA*A + (1-fA)*(fB*B + (1-fB)*(fC*C + (1-fC)*D)) print ">>> plot recursive addition model..." argset3 = RooArgSet(bkg,sig2) model2.plotOn(frame1,LineColor(kRed),LineStyle(kDashDotted),LineWidth(3),Name("model2")) model2.plotOn(frame1,Components(argset3),LineColor(kMagenta),LineStyle(kDashDotted),LineWidth(3),Name("bkgsig22")) model2.Print("t") print ">>> draw pdfs and fits on canvas..." canvas = TCanvas("canvas","canvas",100,100,800,600) legend = TLegend(0.57,0.87,0.95,0.65) legend.SetTextSize(0.030) legend.SetBorderSize(0) legend.SetFillStyle(0) gPad.SetLeftMargin(0.14); gPad.SetRightMargin(0.02) frame1.GetYaxis().SetLabelOffset(0.008) frame1.GetYaxis().SetTitleOffset(1.4) frame1.GetYaxis().SetTitleSize(0.045) frame1.GetXaxis().SetTitleSize(0.045) frame1.Draw() legend.AddEntry("data", "data", 'LEP') legend.AddEntry("model", "composite model", 'L') legend.AddEntry("model2", "composite model (method 2)", 'L') legend.AddEntry("bkg", "background only", 'L') legend.AddEntry("bkgsig2", "background + signal 2", 'L') legend.AddEntry("bkgsig22","background + signal 2 (method 2)",'L') legend.Draw() canvas.SaveAs("rooFit201.png")