def process(self, jobid, nToys): import ROOT from ostap.logger.logger import logWarning with logWarning(): import ostap.core.pyrouts import ostap.fitting.roofit import ostap.fitting.dataset import ostap.fitting.roofitresult import ostap.fitting.variables from ostap.core.ostap_types import integer_types assert isinstance ( nToys , integer_types ) and 0 < nToys,\ 'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) ) import ostap.fitting.toys as Toys results, stats = Toys.make_toys2(gen_pdf=self.gen_pdf, fit_pdf=self.fit_pdf, nToys=nToys, data=self.data, gen_config=self.gen_config, fit_config=self.fit_config, gen_pars=self.gen_pars, fit_pars=self.fit_pars, more_vars=self.more_vars, gen_fun=self.gen_fun, fit_fun=self.fit_fun, accept_fun=self.accept_fun, silent=self.silent, progress=self.progress) self.the_output = results, stats return self.results()
def test_significance_toys(): """Perform toy-study for significance of the signal - generate `nToys` pseudoexperiments using background-only hypothesis - fit each experiment with signal+background hypothesis - store fit results - fill distributions for fit results """ logger = getLogger('test_significance_toys') ## only background hypothesis bkg_only = Models.Bkg_pdf("BKG", xvar=mass, power=0, tau=0) signal = Models.Gauss_pdf('S', xvar=mass, mean=0.5, sigma=0.1) signal.mean.fix(0.4) signal.sigma.fix(0.1) ## signal + background hypothesis model = Models.Fit1D(signal=signal, background=1) model.background.tau.fix(0) results, stats = Toys.make_toys2( gen_pdf=bkg_only, fit_pdf=model, nToys=1000, data=[mass], gen_config={ 'nEvents': 100, 'sample': True }, fit_config={'silent': True}, gen_pars={'tau_BKG': 0.}, ## initial values for generation fit_pars={ 'B': 100, 'S': 10, 'phi0_Bkg_S': 0.0 }, ## initial fit values for parameters silent=True, progress=True) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) h_S = ROOT.TH1F(hID(), '#S', 60, 0, 60) for r in results['S']: h_S.Fill(r) for h in (h_S, ): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def test_toys2(): """Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `gen_pdf` - fit teach experiment with the PDF `fit_pdf` - store fit results - fill distributions of fit results """ logger = getLogger('test_toys2') results, stats = Toys.make_toys2(gen_pdf=gen_gauss, fit_pdf=fit_gauss, nToys=1000, data=[mass], gen_config={ 'nEvents': 200, 'sample': True }, fit_config={'silent': True}, gen_pars={ 'mean_GG': 0.4, 'sigma_GG': 0.1 }, fit_pars={ 'mean_GF': 0.4, 'sigma_GF': 0.1 }, silent=True, progress=True) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) ## make histos h_mean = ROOT.TH1F(hID(), 'mean of Gauss ', 50, 0, 0.80) h_sigma = ROOT.TH1F(hID(), 'sigma of Gauss', 50, 0.05, 0.15) for r in results['mean_FG']: h_mean.Fill(r) for r in results['sigma_FG']: h_sigma.Fill(r) for h in (h_mean, h_sigma): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def parallel_toys2( gen_pdf, ## PDF to generate toys fit_pdf, ## PDF to generate toys nToys, ## total number of toys nSplit, ## split into <code>nSplit</code> subjobs data, ## template for dataset/variables gen_config, ## parameters for <code>pdf.generate</code> fit_config={}, ## parameters for <code>pdf.fitTo</code> gen_pars={}, fit_pars={}, more_vars={}, gen_fun=None, ## generator function ( pdf , varset , **gen_config ) fit_fun=None, ## fit function ( pdf , dataset , **fit_config ) accept_fun=None, ## accept function ( fit-result, pdf, dataset ) silent=True, progress=False, **kwargs): """Make `ntoys` pseudoexperiments, splitting them into `nSplit` subjobs to be executed in parallel - Schematically: >>> for toy in range ( nToys ) : >>> ... dataset = gen_fun ( gen_pdf , ... , **gen_config ) >>> ... result = fit_fun ( fit_pdf , dataset , **fit_config ) >>> ... if not accept_fun ( result , fit_pdf , dataset ) : continue >>> .... < collect statistics here > For each experiment: 1. generate dataset using `pdf` with variables specified in `data` and configuration specified via `gen_config` for each generation the parameters of `pdf` are reset for their initial values and valeus from `init_pars` 2. fit generated dataset with `pdf` using configuration specified via `fit_config` - pdf PDF to be used for generation and fitting - nToys total number of pseudoexperiments to generate - nSplit split total number of pseudoexperiments into `nSplit` subjobs - data variable list of variables to be used for dataset generation - gen_config configuration of <code>pdf.generate</code> - fit_config configuration of <code>pdf.fitTo</code> - gen_pars redefine these parameters for generation of each pseudoexperiment - fit_pars redefine these parameters for fitting of each pseudoexperiment - more_vars dictionary of functions to define the additional results - silent silent toys? - progress show progress bar? It returns a dictionary with fit results for the toys and a dictionary of statistics >>> pdf = ... ... results, stats = parallel_toys2 ( ... gen_pdf = gen_pdf , ## PDF to generate toys ... fit_pdf = gen_pdf , ## PDF to fit toys ... nToys = 100000 , ## total number of toys ... nSplit = 100 , ## split them into `nSplit` subjobs ... data = [ 'mass' ] , ## varibales in dataset ... gen_config = { 'nEvents' : 5000 } , ## configuration of `pdf.generate` ... fit_config = { 'ncpus' : 2 } , ## configuration of `pdf.fitTo` ... gen_pars = { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation ... fit_pars = { 'meanG' : 0.0 , 'sigmaG' : 1.0 } ## parameters to use for fitting ... ) Derived parameters can be also retrived via <code>more_vars</code> argument: >>> ratio = lambda res,pdf : res.ratio('x','y') >>> more_vars = { 'Ratio' : ratio } >>> r, s = parallel_toys2 ( .... , more_vars = more_vars , ... ) Parallelization is controlled by two arguments - `ncpus` : number of local cpus to use, default is `'autodetect'`, that means all local processors - `ppservers`: list of serevers to be used (for parallel python) """ from ostap.core.ostap_types import integer_types assert gen_config and 'nEvents' in gen_config,\ 'Number of events per toy must be specified via "gen_config" %s' % gen_config assert isinstance ( nToys , integer_types ) and 0 < nToys ,\ 'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) ) assert isinstance ( nSplit , integer_types ) and 0 < nSplit ,\ 'Jobid %s: Invalid "nSplit" argument %s/%s' % ( jobid , nSplit , type ( nSplit ) ) import ostap.fitting.toys as Toys if 1 == nSplit: return Toys.make_toys2(gen_pdf=gen_pdf, fit_pdf=fit_pdf, nToys=nToys, data=data, gen_config=gen_config, fit_config=fit_config, gen_pars=gen_pars, fit_pars=fit_pars, more_vars=more_vars, gen_fun=gen_fun, fit_fun=fit_fun, accept_fun=accept_fun, silent=silent, progress=progress) import ostap.fitting.roofit import ostap.fitting.dataset import ostap.fitting.variables import ostap.fitting.roofitresult params = gen_pdf.params() toy_data = [] if isinstance(data, ROOT.RooAbsData): varset = data.varset() for v in varset: toy_data.append(v.GetName()) else: for v in data: if isinstance(v, ROOT.RooAbsArg): toy_data.append(v.GetName()) elif isinstance(v, string_types) and v in params: toy_data.append(v) else: raise TypeError("Invalid type of variable %s/%s" % (v, type(v))) gen_init_pars = Toys.vars_transform(gen_pars) fit_init_pars = Toys.vars_transform(fit_pars) # ======================================================================== if nToys <= nSplit: nToy = 1 nSplit = nToys nRest = 0 else: nToy, nRest = divmod(nToys, nSplit) task = ToysTask2(gen_pdf=gen_pdf, fit_pdf=fit_pdf, data=toy_data, gen_config=gen_config, fit_config=fit_config, gen_pars=gen_init_pars, fit_pars=fit_init_pars, more_vars=more_vars, gen_fun=gen_fun, fit_fun=fit_fun, accept_fun=accept_fun, silent=silent, progress=progress) wmgr = WorkManager(silent=False, **kwargs) data = nSplit * [nToy] if nRest: data.append(nRest) wmgr.process(task, data) results, stats = task.results() Toys.print_stats(stats, nToys) return results, stats