Esempio n. 1
0
    def process(self, jobid, nToys):

        import ROOT
        from ostap.logger.logger import logWarning
        with logWarning():
            import ostap.core.pyrouts
            import ostap.fitting.roofit
            import ostap.fitting.dataset
            import ostap.fitting.roofitresult
            import ostap.fitting.variables

        from ostap.core.ostap_types import integer_types
        assert isinstance ( nToys , integer_types ) and 0 < nToys,\
               'Jobid %s: Invalid "nToys" argument %s/%s' % ( jobid , nToys , type ( nToys ) )

        import ostap.fitting.toys as Toys
        results, stats = Toys.make_toys(pdf=self.pdf,
                                        nToys=nToys,
                                        data=self.data,
                                        gen_config=self.gen_config,
                                        fit_config=self.fit_config,
                                        init_pars=self.init_pars,
                                        more_vars=self.more_vars,
                                        gen_fun=self.gen_fun,
                                        fit_fun=self.fit_fun,
                                        accept_fun=self.accept_fun,
                                        silent=self.silent,
                                        progress=self.progress)

        self.the_output = results, stats

        return self.results()
Esempio n. 2
0
def test_toys():
    """Perform toys-study for possible fit bias and correct uncertainty evaluation
    - generate `nToys` pseudoexperiments with some PDF `pdf`
    - fit teach experiment with the same PDF
    - store  fit results
    - calculate statistics of pulls
    - fill distributions of fit results
    - fill distributions of pulls 
    """

    logger = getLogger('test_toys')

    results, stats = Toys.make_toys(pdf=gen_gauss,
                                    nToys=1000,
                                    data=[mass],
                                    gen_config={
                                        'nEvents': 200,
                                        'sample': True
                                    },
                                    fit_config={
                                        'silent': True,
                                        'refit': 5
                                    },
                                    init_pars={
                                        'mean_GG': 0.4,
                                        'sigma_GG': 0.1
                                    },
                                    silent=True,
                                    progress=True)

    for p in stats:
        logger.info("Toys: %-20s : %s" % (p, stats[p]))

    ## make histos:

    h_mean = ROOT.TH1F('h1', 'mean of Gauss ', 100, 0, 0.80)
    h_sigma = ROOT.TH1F('h2', 'sigma of Gauss', 100, 0.05, 0.15)

    for r in results['mean_GG']:
        h_mean.Fill(r)
    for r in results['sigma_GG']:
        h_sigma.Fill(r)

    for h in (h_mean, h_sigma):

        h.draw()
        logger.info("%s  :\n%s" % (h.GetTitle(), h.dump(30, 10)))
        time.sleep(1)
def test_toys_simfit_1():

    ## make simple test mass
    mass = ROOT.RooRealVar('test_mass', 'Some test mass', 0, 5)

    ## book very simple data set:
    varset1 = ROOT.RooArgSet(mass)
    dataset1 = ROOT.RooDataSet(dsID(), 'Test Data set-1', varset1)

    ## book very simple data set:
    varset2 = ROOT.RooArgSet(mass)
    dataset2 = ROOT.RooDataSet(dsID(), 'Test Data set-2', varset2)

    ## high statistic, low-background "control channel"
    mean1 = 2.0
    sigma1 = 0.50
    NS1 = 1000
    NB1 = 250

    for i in range(NS1):
        v1 = random.gauss(mean1, sigma1)
        if v1 in mass:
            mass.setVal(v1)
            dataset1.add(varset1)

    for i in range(NB1):
        v1 = random.uniform(0, 5)
        if v1 in mass:
            mass.setVal(v1)
            dataset1.add(varset1)

    ## low statistic, high-background "control channel"
    NS2 = 250
    NB2 = 1000
    mean2 = mean1 + 1.0
    sigma2 = sigma1 * 0.5

    for i in range(NS2):
        v2 = random.gauss(mean2, sigma2)
        if v2 in mass:
            mass.setVal(v2)
            dataset2.add(varset2)
    for i in range(NB2):
        v2 = random.uniform(0, 5)
        if v2 in mass:
            mass.setVal(v2)
            dataset2.add(varset2)

    signal1 = Models.Gauss_pdf('G1',
                               xvar=mass,
                               mean=(0.5, 2.5),
                               sigma=(0.1, 1.0))

    model1 = Models.Fit1D(suffix='M1', signal=signal1, background=-1)
    model1.S = NS1
    model1.B = NB1

    mean2 = signal1.vars_add(signal1.mean, 1.0)
    sigma2 = signal1.vars_multiply(signal1.sigma, 0.5)

    signal2 = Models.Gauss_pdf('G2', xvar=mass, mean=mean2, sigma=sigma2)

    model2 = Models.Fit1D(suffix='M2',
                          signal=signal2,
                          background=model1.background)
    model2.S = NS2
    model2.B = NB2

    # =========================================================================
    ## fit 1
    r1, f1 = model1.fitTo(dataset1, draw=True, nbins=50, silent=True)

    ## fit 2
    r2, f2 = model2.fitTo(dataset2, draw=True, nbins=50, silent=True)
    # =========================================================================

    ## combine data
    sample = ROOT.RooCategory('sample', 'sample', 'A', 'B')

    ## combine datasets
    from ostap.fitting.simfit import combined_data
    vars = ROOT.RooArgSet(mass)
    dataset = combined_data(sample, vars, {'A': dataset1, 'B': dataset2})

    ## combine PDFs
    model_sim = Models.SimFit(sample, {'A': model1, 'B': model2}, name='X')

    # =========================================================================
    r, f = model_sim.fitTo(dataset, silent=True)
    r, f = model_sim.fitTo(dataset, silent=True)

    fA = model_sim.draw('A', dataset, nbins=50)
    fB = model_sim.draw('B', dataset, nbins=50)

    logger.info('Fit  results are: %s ' % r.table(prefix="# "))

    ## Make toys
    results, stats = Toys.make_toys(pdf=model_sim,
                                    nToys=100,
                                    data=[mass],
                                    gen_config={
                                        'nEvents': (NS1 + NB1, NS2 + NB2),
                                        'sample': True
                                    },
                                    fit_config={'silent': True},
                                    init_pars={
                                        'mean_G1': mean1,
                                        'BM1': NB1,
                                        'BM2': NB2,
                                        'sigma_G1': sigma1,
                                        'SM1': NS1,
                                        'SM2': NS2,
                                        'phi0_Bkg_FitG1_M1': 0
                                    },
                                    silent=True,
                                    progress=True)
Esempio n. 4
0
def parallel_toys(
        pdf,
        nToys,  ## total number of toys 
        nSplit,  ## split into  <code>nSplit</code> subjobs 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        init_pars={},
        more_vars={},
        gen_fun=None,  ## generator function ( pdf , varset  , **config )
        fit_fun=None,  ## fit       function ( pdf , dataset , **config )
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset )
        silent=True,
        progress=False,
        **kwargs):
    """Make `ntoys` pseudoexperiments, splitting them into `nSplit` subjobs
    to be executed in parallel

    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result , pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      total number    of pseudoexperiments to generate
    - nSplit     split total number of pseudoexperiments into `nSplit` subjobs  
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - init_pars  redefine these parameters for each pseudoexperiment
    - more_vars  dictionary of functions to define the additional results
    - gen_fun    generator function
    - fit_fun    fitting   function
    - accept_fun accept    function    
    - silent     silent toys?
    - progress   show progress bar? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## varibales in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )

    Derived parameters can be also retrived via <code>more_vars</code> argument:
    >>> ratio    = lambda res,pdf : res.ratio('x','y') 
    >>> more_vars = { 'Ratio' : ratio }
    >>> r,  s = parallel_toys ( .... , more_vars = more_vars , ... ) 

    Parallelization is controlled by  two arguments
    - `ncpus` :  number of local cpus to use, default is `'autodetect'`,
    that means all local processors
    - `ppservers`:  list of serevers to be used (for parallel python)

    - If `gen_fun`    is not specified `generate_data` is used 
    - If `fit_fun`    is not specified `make_fit`      is used 
    - If `accept_fun` is not specified `accept_fit`    is used 
 
    """
    from ostap.core.ostap_types import integer_types

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    assert isinstance ( nToys  , integer_types ) and 0 < nToys  ,\
               'Jobid %s: Invalid "nToys"  argument %s/%s' % ( jobid , nToys  , type ( nToys  ) )

    assert isinstance ( nSplit , integer_types ) and 0 < nSplit ,\
               'Jobid %s: Invalid "nSplit" argument %s/%s' % ( jobid , nSplit , type ( nSplit ) )

    import ostap.fitting.toys as Toys
    if 1 == nSplit:
        return Toys.make_toys(pdf=pdf,
                              nToys=nToys,
                              data=data,
                              gen_config=gen_config,
                              fit_config=fit_config,
                              init_pars=init_pars,
                              more_vars=more_vars,
                              gen_fun=gen_fun,
                              fit_fun=fit_fun,
                              accept_fun=accept_fun,
                              silent=silent,
                              progress=progress)

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult

    params = pdf.params()
    toy_data = []
    if isinstance(data, ROOT.RooAbsData):
        varset = data.varset()
        for v in varset:
            toy_data.append(v.GetName())
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg): toy_data.append(v.GetName())
            elif isinstance(v, string_types) and v in params:
                toy_data.append(v)
            else:
                raise TypeError("Invalid type of variable %s/%s" %
                                (v, type(v)))

    toy_init_pars = Toys.vars_transform(init_pars)

    # ========================================================================

    if nToys <= nSplit:
        nToy = 1
        nSplit = nToys
        nRest = 0
    else:
        nToy, nRest = divmod(nToys, nSplit)

    task = ToysTask(pdf=pdf,
                    data=toy_data,
                    gen_config=gen_config,
                    fit_config=fit_config,
                    init_pars=toy_init_pars,
                    more_vars=more_vars,
                    gen_fun=gen_fun,
                    fit_fun=fit_fun,
                    accept_fun=accept_fun,
                    silent=silent,
                    progress=progress)

    wmgr = WorkManager(silent=False, **kwargs)

    data = nSplit * [nToy]
    if nRest: data.append(nRest)

    wmgr.process(task, data)

    results, stats = task.results()
    Toys.print_stats(stats, nToys)

    return results, stats