def asympt_signif(cls, w, print_level=-1): """Analytically calculate one-sided signal significance for a given in the workspace s+b model using RooStats.AsymptoticCalculator. Parameters ---------- w: RooWorkspace workspace containing data and model to be opened. Usage of the method extract_from_workspace is assumed, so the workspace should contain data, s+b and b modelconfigs. print_level: int, optional (default=-1) verbosity of the output Returns ------- as_result: HypoTestResult results of the test (for printing use Print() method) """ data, mc_sb, mc_b = cls.extract_from_workspace(w) if data.isWeighted(): interactivity_yn( 'It\'s not a good idea to do asymptotic significance calculation with weighted data. Sure you want to proceed?' ) ac = ROOT.RooStats.AsymptoticCalculator(data, mc_sb, mc_b) ac.SetPrintLevel(print_level) ac.SetOneSidedDiscovery(True) as_result = ac.GetHypoTest() as_result.Print() return as_result
def write_to_workspace(self, poi, nuisances): """Create a workspace with the fitted to the data model, poi and nuisance parameters. Parameters ---------- nuisances: list of RooRealVar nuisance parameters in statistical inference poi: RooRealVar parameter of interest in statistical inference Returns ------- w: RooWorkspace """ if not self.is_fitted: interactivity_yn( 'Model was not fitted to data, fit it first. Sure you want to proceed?' ) w = ROOT.RooWorkspace("w", True) Import = getattr(ROOT.RooWorkspace, 'import') # special trick to make things not crush Import(w, self.model) mc = ROOT.RooStats.ModelConfig("ModelConfig", w) mc.SetPdf(w.pdf(self.model.GetName())) mc.SetParametersOfInterest(ROOT.RooArgSet(w.var(poi.GetName()))) mc.SetObservables(ROOT.RooArgSet(w.var(self.var.GetName()))) mc.SetNuisanceParameters( ROOT.RooArgSet(*[w.var(nui.GetName()) for nui in nuisances])) mc.SetSnapshot(ROOT.RooArgSet(w.var(poi.GetName()))) Import(w, mc, 'ModelConfig') Import(w, self.data, 'data') return w
def chi2_fit(self, nbins=-1, fix_float=[], minos=False, minos_poi=None): """Fit the instance data with binned chi2 method using Minuit2. Set is_fitted=True. NB: weights presence is taken care of automatically NB: by default, binning is taken from the variable's definition. Otherwise, it is temporarily set to nbins value. Parameters ---------- nbins: int/float, optional (default=-1: take the number of bins from the variable's definition) number of bins in calculating chi2 fix_float: list of RooRealVar, optional (default=[]) variables from this list will be firstly setConstant(1) in the fit and then setConstant(0) minos: bool whether to calculate MINOS errors for minos_poi parameter of interest minos_poi: RooRealVar parameter of interest for which to calculate MINOS errors Returns ------- fit_results: RooFitResult results of the fit, can be printed with the Print() method """ init_nbins = self.var.numBins() if nbins != -1: assert (nbins % 1 == 0 and nbins >= 0), 'nbins type is not a positive integer' self.var.setBins(nbins) hist_to_fit = ROOT.RooDataHist( 'hist_to_fit', 'hist_to_fit', ROOT.RooArgSet(self.var), self.data) ### binning is taken from the var's definition is_extended = self.model.canBeExtended() chi2_var = ROOT.RooChi2Var("chi2_var", "chi2_var", self.model, hist_to_fit, RF.Extended(is_extended), RF.DataError(ROOT.RooAbsData.Auto)) m = ROOT.RooMinimizer(chi2_var) m.setMinimizerType("Minuit2") m.setPrintLevel(3) m.minimize("Minuit2", "minimize") for param in fix_float: param.setConstant(1) m.minimize("Minuit2", "minimize") for param in fix_float: param.setConstant(0) self.fit_status = m.minimize("Minuit2", "minimize") self.is_fitted = True self.var.setBins(init_nbins) if minos: if minos_poi is None: raise TypeError( 'Poi is None by default: set it to a proper variable to run MINOS.' ) if self.data.isWeighted(): interactivity_yn( 'The data is weighted and MINOS should not be used. Sure you want to proceed?' ) m.minos(ROOT.RooArgSet(minos_poi)) print('\n\n\nMINOS DONE, see the results above\n\n\n') return m.save()
def asympt_signif_ll(cls, w): """Analytically calculate one-sided significance for a given in the workspace s+b model by bare hands (through likelihoods). Might be useful as a cross-check to asympt_signif(). NB: this gives more control on fitting procedure than asympt_signif() method Parameters ---------- w: RooWorkspace workspace containing data and model to be opened. Usage of the method extract_from_workspace is assumed, so the workspace should contain data, s+b and b modelconfigs. Returns ------- dict: dictionary results of the test: p-value, Z-value (signal significance), negative loglikelihood for s+b and b hypotheses respectively """ data, mc_sb, mc_b = cls.extract_from_workspace(w) if data.isWeighted(): interactivity_yn( 'It\'s not a good idea to do asymptotic significance calculation with weighted data. Sure you want to proceed?' ) num_poi = mc_sb.GetParametersOfInterest().getSize() if num_poi != 1: print( f'This implementation works only for one parameter of interest (you have {num_poi}). Either change pois in the workspace or see 1007.1727 paper.' ) return mc_sb.LoadSnapshot() model_sb = mc_sb.GetPdf() DE_sb = cls(label='sb', data=data, model=model_sb) rrr_sig = DE_sb.fit(is_sum_w2=data.isWeighted()) mc_b.LoadSnapshot() mc_b.GetParametersOfInterest().first().setConstant() model_b = mc_b.GetPdf() DE_b = cls(label='b', data=data, model=model_b) rrr_null = DE_b.fit(is_sum_w2=data.isWeighted()) nll_sig = rrr_sig.minNll() nll_null = rrr_null.minNll() P = ROOT.TMath.Prob( 2 * (nll_null - nll_sig), 1 ) ## !!! should be always ndf = 1 = number of poi for this formula to work S = ROOT.TMath.ErfcInverse(P) * sqrt( 2) # this yields same result as AsymptoticCalculator # S = ROOT.Math.gaussian_quantile_c(P, 1) # this is slightly different, might be python precision issues return {'P': P, 'S': S, 'nll_sig': nll_sig, 'nll_null': nll_null}
def fit(self, is_sum_w2=-1, fix=[], fix_float=[], minos=False): """Fit instance data with instance model using fitTo() method. Extended or not is infered from the model. Set is_fitted=True. NB: the corresponding model parameters will be updated outside of the class instance after executing! Parameters ---------- is_sum_w2: bool, optional (default=-1: True if data is weighted and False otherwise) correct Hessian with data weights matrix to get correct errors, see RooFit tutorial rf403__weightedevts for details fix: list of RooRealVar, optional (default=[]) variables from this list will be fixed throughout all the fit and afterwards released fix_float: list of RooRealVar, optional (default=[]) variables from this list during the fit will be firstly setConstant(1) and then setConstant(0) minos: bool whether to calculate MINOS errors (will be done for all the parameters) Returns ------- fit_results: RooFitResult results of the fit, can be printed with the Print() method """ if is_sum_w2 != -1: assert ( type(is_sum_w2) is bool ), 'is_sum_w2 is not boolean, set it to either True or False' is_extended = self.model.canBeExtended() is_sum_w2 = self.data.isWeighted() if is_sum_w2 == -1 else is_sum_w2 for param in fix: param.setConstant(1) self.model.fitTo(self.data, RF.Extended(is_extended), RF.SumW2Error(is_sum_w2)) for param in fix_float: param.setConstant(1) self.model.fitTo(self.data, RF.Extended(is_extended), RF.SumW2Error(is_sum_w2)) for param in fix_float: param.setConstant(0) self.model.fitTo(self.data, RF.Extended(is_extended), RF.SumW2Error(is_sum_w2)) fit_results = self.model.fitTo(self.data, RF.Extended(is_extended), RF.SumW2Error(is_sum_w2), RF.Save()) if minos: if self.data.isWeighted(): interactivity_yn( 'The data is weighted and MINOS should not be used. Sure you want to proceed?' ) fit_results = self.model.fitTo(self.data, RF.Extended(is_extended), RF.SumW2Error(is_sum_w2), RF.Minos(), RF.Save()) print( '\n\n\nMINOS DONE, see the NEGATIVE/POSITIVE columns above\n\n\n' ) if is_sum_w2: print( '\n\n' + 70 * '~' + '\n' + ' ' * 30 + 'BEWARE!\n\nYou set is_sum_w2=True, which means that the data might be weighted.\nErrors might differ between two printed tables!\nThe last one from RooFitResult.Print() should be correct.\nYou might also want to consider chi2_fit() method as a cross-check,\nas in principle, that should give correct and more reliable results\n(but the normalization in this case will likely not be preserved \nand results might be unstable)\n' + 70 * '~' + '\n\n') for param in fix: param.setConstant(0) self.is_fitted = True self.fit_status = fit_results.status() fit_results.Print() return fit_results