def process(self, jobid, nToys): import ROOT from ostap.logger.logger import logWarning with logWarning(): import ostap.core.pyrouts import ostap.fitting.roofit import ostap.fitting.dataset import ostap.fitting.roofitresult import ostap.fitting.variables from ostap.core.ostap_types import integer_types assert isinstance ( nToys , integer_types ) and 0 < nToys,\ 'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) ) import ostap.fitting.toys as Toys results, stats = Toys.make_toys(pdf=self.pdf, nToys=nToys, data=self.data, gen_config=self.gen_config, fit_config=self.fit_config, init_pars=self.init_pars, more_vars=self.more_vars, gen_fun=self.gen_fun, fit_fun=self.fit_fun, accept_fun=self.accept_fun, silent=self.silent, progress=self.progress) self.the_output = results, stats return self.results()
def test_toys(): """Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `pdf` - fit teach experiment with the same PDF - store fit results - calculate statistics of pulls - fill distributions of fit results - fill distributions of pulls """ logger = getLogger('test_toys') results, stats = Toys.make_toys(pdf=gen_gauss, nToys=1000, data=[mass], gen_config={ 'nEvents': 200, 'sample': True }, fit_config={ 'silent': True, 'refit': 5 }, init_pars={ 'mean_GG': 0.4, 'sigma_GG': 0.1 }, silent=True, progress=True) for p in stats: logger.info("Toys: %-20s : %s" % (p, stats[p])) ## make histos: h_mean = ROOT.TH1F('h1', 'mean of Gauss ', 100, 0, 0.80) h_sigma = ROOT.TH1F('h2', 'sigma of Gauss', 100, 0.05, 0.15) for r in results['mean_GG']: h_mean.Fill(r) for r in results['sigma_GG']: h_sigma.Fill(r) for h in (h_mean, h_sigma): h.draw() logger.info("%s :\n%s" % (h.GetTitle(), h.dump(30, 10))) time.sleep(1)
def test_toys_simfit_1(): ## make simple test mass mass = ROOT.RooRealVar('test_mass', 'Some test mass', 0, 5) ## book very simple data set: varset1 = ROOT.RooArgSet(mass) dataset1 = ROOT.RooDataSet(dsID(), 'Test Data set-1', varset1) ## book very simple data set: varset2 = ROOT.RooArgSet(mass) dataset2 = ROOT.RooDataSet(dsID(), 'Test Data set-2', varset2) ## high statistic, low-background "control channel" mean1 = 2.0 sigma1 = 0.50 NS1 = 1000 NB1 = 250 for i in range(NS1): v1 = random.gauss(mean1, sigma1) if v1 in mass: mass.setVal(v1) dataset1.add(varset1) for i in range(NB1): v1 = random.uniform(0, 5) if v1 in mass: mass.setVal(v1) dataset1.add(varset1) ## low statistic, high-background "control channel" NS2 = 250 NB2 = 1000 mean2 = mean1 + 1.0 sigma2 = sigma1 * 0.5 for i in range(NS2): v2 = random.gauss(mean2, sigma2) if v2 in mass: mass.setVal(v2) dataset2.add(varset2) for i in range(NB2): v2 = random.uniform(0, 5) if v2 in mass: mass.setVal(v2) dataset2.add(varset2) signal1 = Models.Gauss_pdf('G1', xvar=mass, mean=(0.5, 2.5), sigma=(0.1, 1.0)) model1 = Models.Fit1D(suffix='M1', signal=signal1, background=-1) model1.S = NS1 model1.B = NB1 mean2 = signal1.vars_add(signal1.mean, 1.0) sigma2 = signal1.vars_multiply(signal1.sigma, 0.5) signal2 = Models.Gauss_pdf('G2', xvar=mass, mean=mean2, sigma=sigma2) model2 = Models.Fit1D(suffix='M2', signal=signal2, background=model1.background) model2.S = NS2 model2.B = NB2 # ========================================================================= ## fit 1 r1, f1 = model1.fitTo(dataset1, draw=True, nbins=50, silent=True) ## fit 2 r2, f2 = model2.fitTo(dataset2, draw=True, nbins=50, silent=True) # ========================================================================= ## combine data sample = ROOT.RooCategory('sample', 'sample', 'A', 'B') ## combine datasets from ostap.fitting.simfit import combined_data vars = ROOT.RooArgSet(mass) dataset = combined_data(sample, vars, {'A': dataset1, 'B': dataset2}) ## combine PDFs model_sim = Models.SimFit(sample, {'A': model1, 'B': model2}, name='X') # ========================================================================= r, f = model_sim.fitTo(dataset, silent=True) r, f = model_sim.fitTo(dataset, silent=True) fA = model_sim.draw('A', dataset, nbins=50) fB = model_sim.draw('B', dataset, nbins=50) logger.info('Fit results are: %s ' % r.table(prefix="# ")) ## Make toys results, stats = Toys.make_toys(pdf=model_sim, nToys=100, data=[mass], gen_config={ 'nEvents': (NS1 + NB1, NS2 + NB2), 'sample': True }, fit_config={'silent': True}, init_pars={ 'mean_G1': mean1, 'BM1': NB1, 'BM2': NB2, 'sigma_G1': sigma1, 'SM1': NS1, 'SM2': NS2, 'phi0_Bkg_FitG1_M1': 0 }, silent=True, progress=True)
def parallel_toys( pdf, nToys, ## total number of toys nSplit, ## split into <code>nSplit</code> subjobs data, ## template for dataset/variables gen_config, ## parameters for <code>pdf.generate</code> fit_config={}, ## parameters for <code>pdf.fitTo</code> init_pars={}, more_vars={}, gen_fun=None, ## generator function ( pdf , varset , **config ) fit_fun=None, ## fit function ( pdf , dataset , **config ) accept_fun=None, ## accept function ( fit-result, pdf, dataset ) silent=True, progress=False, **kwargs): """Make `ntoys` pseudoexperiments, splitting them into `nSplit` subjobs to be executed in parallel - Schematically: >>> for toy in range ( nToys ) : >>> ... dataset = gen_fun ( pdf , ... , **gen_config ) >>> ... result = fit_fun ( pdf , dataset , **fit_config ) >>> ... if not accept_fun ( result , pdf , dataset ) : continue >>> .... < collect statistics here > For each experiment: 1. generate dataset using `pdf` with variables specified in `data` and configuration specified via `gen_config` for each generation the parameters of `pdf` are reset for their initial values and valeus from `init_pars` 2. fit generated dataset with `pdf` using configuration specified via `fit_config` - pdf PDF to be used for generation and fitting - nToys total number of pseudoexperiments to generate - nSplit split total number of pseudoexperiments into `nSplit` subjobs - data variable list of variables to be used for dataset generation - gen_config configuration of <code>pdf.generate</code> - fit_config configuration of <code>pdf.fitTo</code> - init_pars redefine these parameters for each pseudoexperiment - more_vars dictionary of functions to define the additional results - gen_fun generator function - fit_fun fitting function - accept_fun accept function - silent silent toys? - progress show progress bar? It returns a dictionary with fit results for the toys and a dictionary of statistics >>> pdf = ... ... results, stats = make_toys ( pdf , ## PDF to use ... 1000 , ## number of toys ... [ 'mass' ] , ## varibales in dataset ... { 'nEvents' : 5000 } , ## configuration of `pdf.generate` ... { 'ncpus' : 2 } , ## configuration of `pdf.fitTo` ... { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation ... ) Derived parameters can be also retrived via <code>more_vars</code> argument: >>> ratio = lambda res,pdf : res.ratio('x','y') >>> more_vars = { 'Ratio' : ratio } >>> r, s = parallel_toys ( .... , more_vars = more_vars , ... ) Parallelization is controlled by two arguments - `ncpus` : number of local cpus to use, default is `'autodetect'`, that means all local processors - `ppservers`: list of serevers to be used (for parallel python) - If `gen_fun` is not specified `generate_data` is used - If `fit_fun` is not specified `make_fit` is used - If `accept_fun` is not specified `accept_fit` is used """ from ostap.core.ostap_types import integer_types assert gen_config and 'nEvents' in gen_config,\ 'Number of events per toy must be specified via "gen_config" %s' % gen_config assert isinstance ( nToys , integer_types ) and 0 < nToys ,\ 'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) ) assert isinstance ( nSplit , integer_types ) and 0 < nSplit ,\ 'Jobid %s: Invalid "nSplit" argument %s/%s' % ( jobid , nSplit , type ( nSplit ) ) import ostap.fitting.toys as Toys if 1 == nSplit: return Toys.make_toys(pdf=pdf, nToys=nToys, data=data, gen_config=gen_config, fit_config=fit_config, init_pars=init_pars, more_vars=more_vars, gen_fun=gen_fun, fit_fun=fit_fun, accept_fun=accept_fun, silent=silent, progress=progress) import ostap.fitting.roofit import ostap.fitting.dataset import ostap.fitting.variables import ostap.fitting.roofitresult params = pdf.params() toy_data = [] if isinstance(data, ROOT.RooAbsData): varset = data.varset() for v in varset: toy_data.append(v.GetName()) else: for v in data: if isinstance(v, ROOT.RooAbsArg): toy_data.append(v.GetName()) elif isinstance(v, string_types) and v in params: toy_data.append(v) else: raise TypeError("Invalid type of variable %s/%s" % (v, type(v))) toy_init_pars = Toys.vars_transform(init_pars) # ======================================================================== if nToys <= nSplit: nToy = 1 nSplit = nToys nRest = 0 else: nToy, nRest = divmod(nToys, nSplit) task = ToysTask(pdf=pdf, data=toy_data, gen_config=gen_config, fit_config=fit_config, init_pars=toy_init_pars, more_vars=more_vars, gen_fun=gen_fun, fit_fun=fit_fun, accept_fun=accept_fun, silent=silent, progress=progress) wmgr = WorkManager(silent=False, **kwargs) data = nSplit * [nToy] if nRest: data.append(nRest) wmgr.process(task, data) results, stats = task.results() Toys.print_stats(stats, nToys) return results, stats