Example #1
0
def make_toys(
        pdf,
        nToys,
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        init_pars={},
        more_vars={},
        gen_fun=None,  ## generator function ( pdf , varset  , **config )
        fit_fun=None,  ## fit       function ( pdf , dataset , **config )
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset )
        silent=True,
        progress=True):
    """Make `nToys` pseudoexperiments

    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result , pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    For each pseudoexperiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      number    of pseudoexperiments to generate
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - init_pars  redefine these parameters for each pseudoexperiment
    - more_vars  dictionary of functions to define the additional results
    - gen_fun    generator function
    - fit_fun    fitting   function
    - accept_fun accept    function
    - silent     silent toys?
    - progress   show progress bar? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## variables in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )

    Derived parameters can be also retrived via <code>more_vars</code> argument:
    >>> ratio    = lambda res,pdf : res.ratio('x','y') 
    >>> more_vars = { 'Ratio' : ratio }
    >>> r,  s = make_toys ( .... , more_vars = more_vars , ... ) 

    - If `gen_fun`    is not specified `generate_data` is used 
    - If `fit_fun`    is not specified `make_fit`      is used 
    - If `accept_fun` is not specified `accept_fit`    is used 
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    ## 1. generator function?
    if gen_fun is None:
        if not silent:
            logger.info("make_toys: use default ``generate_data'' function!")
        gen_fun = generate_data
    assert gen_fun and callable(gen_fun), 'Invalid generator function!'

    ## 2. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_toys: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 3. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_toys: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    params = pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in params:
                varset.add(params[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    fix_pars = vars_transform(params)
    fix_init = vars_transform(init_pars)

    pdf.load_params(params=fix_pars, silent=silent)
    pdf.load_params(params=fix_init, silent=silent)

    ## save all initial parameters (needed for the final statistics)
    params = pdf.params()
    fix_all = vars_transform(params)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE, VE

    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covariance matrix quality

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        pdf.load_params(params=fix_pars, silent=silent)
        pdf.load_params(params=init_pars, silent=silent)

        ## 2. generate dataset!
        ## dataset = pdf.generate ( varset = varset , **gen_config )
        dataset = gen_fun(pdf, varset=varset, **gen_config)
        if not silent:
            logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. fit it!
        r = fit_fun(pdf, dataset, **fitcnf)
        if not silent:
            logger.info('Fit result #%d\n%s' %
                        (i, r.table(title='Fit result #%d' % i, prefix='# ')))

        ## fit status
        fits[r.status()] += 1

        ## covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, pdf, dataset):

            ## 4. save results
            rpf = r.params(float_only=True)
            for p in rpf:
                results[p].append(rpf[p][0])

            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, pdf))

            results['#'].append(len(dataset))

        dataset.clear()
        del dataset
        del r

    ## make a final statistics
    stats = defaultdict(SE)

    for par in results:
        pars = results[par]
        mvar = par in more_vars
        if not mvar: a0 = fix_all.get(par, None)
        for v in pars:
            v0 = float(v)
            stats[par] += v0
            if not mvar and not a0 is None and isinstance(
                    v, VE) and 0 < v.error():
                stats['pull:%s' % par] += (v0 - a0) / v.error()

    for k in fits:
        stats['- Status  %s' % k] = fits[k]
    for k in covs:
        stats['- CovQual %s' % k] = covs[k]

    if progress or not silent: print_stats(stats, nToys)

    return results, stats
Example #2
0
def make_toys2(
        gen_pdf,  ## pdf to generate toys 
        fit_pdf,  ## pdf to fit  
        nToys,  ## number of pseudoexperiments 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        gen_pars={},  ## gen-parameters to reset/use 
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated
        gen_fun=None,  ## generator function ( pdf , varset  , **gen_config ) 
        fit_fun=None,  ## fit       function ( pdf , dataset , **fit_config ) 
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset     )
        silent=True,
        progress=True,
        logger=logger,
        frequency=1000):
    """Make `ntoys` pseudoexperiments
    
    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( gen_pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( fit_pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result  , fit_pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - `pdf`         : PDF to be used for generation and fitting
    - `nToys`       : number    of pseudoexperiments to generate
    - `data`        : variable list of variables to be used for dataset generation
    - `gen_config`  : configuration of <code>pdf.generate</code>
    - `fit_config`  : configuration of <code>pdf.fitTo</code>
    - `gen_pars`    : redefine these parameters for generation of each pseudoexperiment
    - `fit_pars`    : redefine these parameters for fit of each pseudoexperiment
    - `silent`      : silent toys?
    - `progress`    : show progress bar?
    - `logger`      : use this logger 
    - `frequency`   : how often to dump the intermediate results ? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## varibales in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    ## 1. generator function?
    if gen_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``generate_data'' function!")
        gen_fun = generate_data
    assert gen_fun and callable(gen_fun), 'Invalid generator function!'

    ## 2. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 3. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    if progress and not silent:
        assert isinstance ( frequency , integer_types ) and 0 < frequency,\
               "make_toys2: invalid ``frequency'' parameter %s" % frequency

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    gparams = gen_pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in gparams:
                varset.add(gparams[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    ## parameters for generation

    fix_gen_init = vars_transform(gparams)
    fix_gen_pars = vars_transform(gen_pars)

    ## parameters for fitting

    fparams = fit_pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE

    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covarinace matrix quality

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        gen_pdf.load_params(params=fix_gen_init, silent=silent)
        gen_pdf.load_params(params=fix_gen_pars, silent=silent)

        ## 2. generate dataset!
        dataset = gen_fun(gen_pdf, varset=varset, **gen_config)
        if not silent: logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. reset parameters of fit_pdf
        fit_pdf.load_params(params=fix_fit_init, silent=silent)
        fit_pdf.load_params(params=fix_fit_pars, silent=silent)

        ## 4. fit it!
        r = fit_fun(fit_pdf, dataset, **fitcnf)

        ## fit status
        fits[r.status()] += 1

        ## covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, fit_pdf, dataset):

            ## 5. save results
            rpf = r.params(float_only=True)
            for j in rpf:
                results[j].append(rpf[j][0])

            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, fit_pdf))

            results['#'].append(len(dataset))
            results['#sumw'].append(dataset.sumVar('1'))

        dataset.clear()
        del dataset

        if progress or not silent:
            if 0 < frequency and 1 <= i and 0 == (i + 1) % frequency:
                stats = make_stats(results, fits, covs)
                print_stats(stats, i + 1, logger=logger)

    ## make a final statistics
    stats = make_stats(results, fits, covs)

    if progress or not silent:
        print_stats(stats, nToys, logger=logger)

    return results, stats
Example #3
0
def make_toys2(
        gen_pdf,  ## pdf to generate toys 
        fit_pdf,  ## pdf to fit  
        nToys,  ## number of pseudoexperiments 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        gen_pars={},  ## gen-parameters to reset/use 
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated  
        silent=True,
        progress=True):
    """Make `ntoys` pseudoexperiments
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      number    of pseudoexperiments to generate
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - gen_pars   redefine these parameters for generation of each pseudoexperiment
    - fit_pars   redefine these parameters for fit of each pseudoexperiment
    - silent     silent toys?
    - progress  show progress bar? 
    
    It returns a dictionary with fit results for the toys
    
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## varibales in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    gparams = gen_pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in gparams:
                varset.add(gparams[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    ## parameters for generation

    fix_gen_init = vars_transform(gparams)
    fix_gen_pars = vars_transform(gen_pars)

    ## parameters for fitting

    fparams = fit_pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        gen_pdf.load_params(None, fix_gen_init, silent=silent)
        gen_pdf.load_params(None, fix_gen_pars, silent=silent)

        ## 2. generate dataset!
        dataset = gen_pdf.generate(varset=varset, **gen_config)
        if not silent:
            logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. reset parameters of fit_pdf
        fit_pdf.load_params(None, fix_fit_init, silent=silent)
        fit_pdf.load_params(None, fix_fit_pars, silent=silent)

        ## 4. fit it!
        r, _ = fit_pdf.fitTo(dataset, **fitcnf)
        if not silent:
            logger.info('Fit result #%d\n%s' %
                        (i, r.table(title='Fit result #%d' % i, prefix='# ')))

        ## skip invalid fits
        if r.status(): continue

        ## 5. save results
        rpf = r.params(float_only=True)
        for i in rpf:
            results[i].append(rpf[i][0])

        for v in more_vars:
            func = more_vars[v]
            results[v].append(func(r, fit_pdf))

        dataset.clear()
        del dataset

    ## make a final statistics
    from ostap.core.core import SE
    stats = defaultdict(SE)

    for par in results:
        pars = results[par]
        for v in pars:
            v0 = float(v)
            stats[par] += v0

    if progress or not silent: print_stats(stats, nToys)

    return results, stats