Example #1
0
def test_modify_initial_tree(NEXP=10):
    """Add pseudoexepriments into TTree/TChain
    """

    files = prepare_data(1, 100000)

    logger.info('Add %s pseudoexepriments into TTree/TChain' % NEXP)

    logger.info('#files:    %s' % len(files))
    data = Data('S', files)
    logger.info('Initial Tree/Chain:\n%s' % data.chain.table(prefix='# '))

    ## pseudo experiments
    for e in progress_bar(range(NEXP)):
        h2_new = h2.sample()
        func = Ostap.Functions.FuncTH2(h2_new, 'pt', 'eta')
        data.chain.add_new_branch('w%d' % e, func)

    data = Data('S', files)
    logger.info('Tree/Chain after:\n%s' % data.chain.table(prefix='# '))

    counter = SE()
    for e in range(NEXP):
        weight = 'w%d' % e
        accepted = data.chain.sumVar('1', weight * cut)
        rejected = data.chain.sumVar('1', weight * ~cut)
        efficiency = 1 / (1 + rejected / accepted)
        logger.info("Experiment %3d, accepted/rejected %s/%s , eff = %s " %
                    (e, accepted, rejected, efficiency))
        counter += efficiency
    logger.info('Statistics of pseudoexperiments %s' % counter)
    logger.info('Mean/rms: %s[%%]/%.4f]%%]' %
                (counter.mean() * 100, counter.rms() * 100))
Example #2
0
def test_ppft_callable():
    """Test parallel python with callable  
    """
    logger = getLogger("ostap.test_ppft_callable")
    logger.info('Test job submission with %s' % ppft)

    if not ppft:
        logger.error("ppft is not available")
        return

    if DILL_PY3_issue:
        logger.warning("test is disabled for Python %s (DILL/ROOT/PY3 issue)")
        return

    job_server = ppft.Server()

    jobs = [(i, job_server.submit(mh.__call__, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(jobs):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 20))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    with wait(1), use_canvas('test_ppft_callable'):
        result.draw()

    return result
Example #3
0
    def iexecute(self, job, jobs_args, progress=False):
        """Process the bare `executor` function
        >>> mgr  = WorkManager  ( .... )
        >>> job  = ...
        >>> args = ...
        >>> for result in mgr.iexecute ( job , args ) :
        ...
        ...
        It is a ``minimal'' interface
        - no statistics
        - no summary prin
        - no merging of results  
        """

        with pool_context(self.pool) as pool:

            ## create and submit jobs
            jobs = pool.uimap(job, jobs_args)

            njobs = len(jobs_args) if isinstance(jobs_args, Sized) else None
            silent = self.silent or not progress

            ## retrive (asynchronous) results from the jobs
            for result in progress_bar(jobs,
                                       max_value=njobs,
                                       description="# Jobs execution",
                                       silent=silent):
                yield result
Example #4
0
def test_pp_function():
    """Test parallel python with plain function
    """
    logger = getLogger("ostap.test_pp_function")
    logger.info('Test job submission with %s' % pp)

    from ostap.core.known_issues import DILL_ROOT_issue
    if DILL_ROOT_issue:
        logger.warning("test is disabled for Python %s (dill/ROOT issue)")
        return

    job_server = pp.Server()

    jobs = [(i, job_server.submit(make_histo, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(uimap(jobs), max_value=len(jobs)):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 10))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    job_server.print_stats()

    with wait(1), use_canvas('test_pp_function'):
        result.draw()

    return result
def test_multiprocessing_function():
    """Test parallel processnig with multiprocessing
    """

    logger = getLogger("ostap.test_multiprocessing_function")
    logger.info('Test job submission with module %s' % multiprocessing)

    ncpus = multiprocessing.cpu_count()

    from multiprocessing import Pool

    pool = Pool(ncpus)

    jobs = pool.imap_unordered(make_histos, zip(count(), inputs))

    result = None
    for h in progress_bar(jobs, max_value=len(inputs)):
        if not result: result = h
        else: result.Add(h)

    pool.close()
    pool.join()

    logger.info("Histogram is %s" % result.dump(80, 20))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    with wait(5), use_canvas('test_multiprocessing_function'):
        result.draw()

    return result
Example #6
0
def test_ppft_callable():
    """Test parallel python with callable  
    """
    logger = getLogger("ostap.test_ppft_callable")
    logger.info('Test job submission with %s' % ppft)

    if not ppft:
        logger.error("ppft is not available")
        return

    logger.warning("test is disabled for UNKNOWN REASON")
    return

    job_server = ppft.Server()

    jobs = [(i, job_server.submit(mh.__call__, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(jobs):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 20))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    result.Draw()
    time.sleep(2)

    return result
Example #7
0
def test_pp_callable():
    """Test parallel python with callable  
    """
    logger = getLogger("ostap.test_pp_callable")
    logger.info('Test job submission with %s' % pp)

    logger.warning("test is disabled for UNKNOWN REASON")
    return

    job_server = pp.Server()

    jobs = [(i, job_server.submit(mh.__call__, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(uimap(jobs), max_value=len(jobs)):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 10))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    with wait(1), use_canvas('test_pp_callable'):
        result.draw()

    return result
def test_pickle():
    logger = getLogger('test_pickle')
    logger.info('Check pickling/unpickling')

    bad = False
    import pickle
    rows = [('#', 'before', 'after', 'mean', 'rms')]
    for i, f in enumerate(progress_bar(functions), start=1):

        n, ff = f
        fs = pickle.loads(pickle.dumps(ff))
        s = SE()
        for j in range(1000):
            x = random.uniform(ff.xmin(), ff.xmax())
            s += abs(fs(x) - ff(x))

        mean = '%-+.6g' % s.mean()
        rms = '%-+.6g' % s.rms()

        if 1.e-7 < s.mean(): mean = attention(mean)
        if 1.e-7 < s.rms(): rms = attention(rms)

        row = '%d' % i, ff.__class__.__name__, fs.__class__.__name__, mean, rms
        ## row = '%d' % i , '%s' % ff  , '%s' % fs , '%-+.5g' % s.mean() , '%-+.5g' % s.rms()

        rows.append(row)

    import ostap.logger.table as T
    title = "Compare before/after serialisation"
    table = T.table(rows, title=title, prefix='# ', alignment='rllll')
    if bad: logger.warning('%s\n%s' % (title, table))
    else: logger.info('%s\n%s' % (title, table))
Example #9
0
def test_derivative_4 ():

    logger = getLogger ( 'test_derivative_4' )


    functions = (
        ( lambda x : cos(10.*x)   , lambda x : -10*sin(10.*x)                  ) , 
        ( lambda x : x**3         , lambda x : 3.0*x*x                         ) , 
        ( lambda x : exp(x)       , lambda x : exp(x)                          ) ,
        ( lambda x : x**8         , lambda x : 8.0*x**7                        ) ,
        ( lambda x : tanh(2.*x)   , lambda x : 2*(1.-tanh(2.*x)**2)            ) ,
        ( lambda x : 1.11*x       , lambda x : 1.11                            ) , 
        ( lambda x : 1.11111      , lambda x : 0.0                             ) , 
        ( lambda x : x**10        , lambda x : 10.*x**9                        ) , 
        )
    
    from ostap.core.core import SE
    counters = {} 

    from ostap.utils.progress_bar import progress_bar
    
    IMAX  = 8 
    table =  [ ['Function'] + [ 'I=%d' % i for i in range ( IMAX ) ] ] 
    for i , o in enumerate ( progress_bar ( functions ) ) :
        
        fun = o [ 0 ] ## function 
        der = o [ 1 ] ## derivative 
        
        row = [ '%2d' % (i+1) ]
        for I in range ( IMAX ) :
            
            cnt1 = SE ()            
            cnt2 = SE ()
            
            dd   = Derivative ( fun , step = 0.001 , calc = I , with_error = True )

            for j in range ( 1000 ) :
                
                x = random.uniform ( 0.05 , 1.5 )
                res  = dd ( x ) 
                dif  = float ( res ) - der ( x ) 
                cnt1 += dif 
                if res.cov2() > 0 : cnt2 += dif/res.error()  
                
            mmax1 = abs ( cnt1.max ()  *10**12 ) 
            if 2 < cnt2.nEntries() : 
                mmax2 = cnt2.max()
                row.append ( '%7.3f / %-5.2fs' % ( mmax1, mmax2 ) )
            else :
                mmax2 = 0
                row.append ( '%7.3f / %-5.2fs' % ( mmax1, mmax2 ) )
                
        table.append ( row )

    table = T.table ( table , prefix = '# ' , alignment=9*'c' )
    logger.info ('Numerical differentiation: Max difference [10^12]\n%s' % table ) 
Example #10
0
def prepare_data(nfiles=50, nentries=500):

    from ostap.utils.cleanup import CleanUp
    files = [
        CleanUp.tempfile(prefix='ostap-test-trees-addbranch-%d-' % i,
                         suffix='.root') for i in range(nfiles)
    ]

    for f in progress_bar(files):
        create_tree(f, nentries)
    return files
Example #11
0
def prepare_data(nfiles=50, nentries=500):
    """ prepare data for tests
    """
    from ostap.utils.cleanup import CleanUp
    files = [
        CleanUp.tempfile(prefix='ostap-test-fitting-fill-%d-' % i,
                         suffix='.root') for i in range(nfiles)
    ]

    for f in progress_bar(files):
        create_tree(f, nentries)
    return files
Example #12
0
def prepare_data(nfiles=10, nentries=100):
    """Prepare data for the test
    """

    files = []
    for i in progress_bar(range(nfiles)):

        from ostap.utils.cleanup import CleanUp
        tmpfile = CleanUp.tempfile(prefix='ostap-test-selectors-',
                                   suffix='.root')
        files.append(create_tree(tmpfile, nentries))

    files.sort()
    return Data('S', files)
Example #13
0
def test_pathos_pp_callable () :
    """Test parallel processnig with pathos: ParallelPool  
    """
    logger = getLogger("ostap.test_pathos_pp_callable")         
    if not pathos :
        logger.error ( "pathos is not available" )
        return
    
    logger.info ('Test job submission with %s' %  pathos ) 
    
    if DILL_PY3_issue : 
        logger.warning ("test is disabled (DILL/ROOT/PY3 issue)" )
        return

    ## logger.warning ("test is disabled for UNKNOWN REASON")
    ## return

    from pathos.helpers import cpu_count
    ncpus = cpu_count  ()
    
    from pathos.pools import ParallelPool as Pool 

    pool = Pool ( ncpus )   
    logger.info ( "Pool is %s" %  ( type ( pool ).__name__ ) )

    pool.restart ( True ) 


    mh   = MakeHisto() 
    jobs = pool.uimap ( mh.process ,  [  ( i , n )  for  ( i , n ) in enumerate ( inputs ) ] )
    
    result = None 
    for h in progress_bar ( jobs , max_value = len ( inputs ) ) :
        if not result  : result = h
        else           : result.Add ( h )

    pool.close ()
    pool.join  ()
    pool.clear ()
    
    logger.info ( "Histogram is %s" % result.dump ( 80 , 10 )  )
    logger.info ( "Entries  %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) 
    
    with wait ( 1 ) , use_canvas ( 'test_pathos_pp_callable' ) : 
        result.draw (   ) 

    return result 
Example #14
0
def test_multiprocess_function () :
    """Test parallel processnig with multiprocess
    """
    logger =    getLogger ("ostap.test_multiprocess_function")
    logger.info ('Test job submission with %s' %  multiprocess ) 
    
    if not dill :
        logger.error ( "dill is not available" )
        return
        
    if not multiprocess :
        logger.error ( "multiprocess is not available" )
        return 
        
    from ostap.core.known_issues import DILL_ROOT_issue
    if DILL_ROOT_issue : 
        logger.warning ("test is disabled for Python %s (dill/ROOT issue)" )
        return
    
    ncpus = multiprocess.cpu_count() 
    
    from multiprocess import Pool
    
    pool = Pool  ( ncpus ) 
    
    jobs = pool.imap_unordered ( make_histos , zip ( count() ,  inputs ) )
    
    result = None 
    for h in progress_bar ( jobs , max_value = len ( inputs ) ) :
        if not result  : result = h
        else           : result.Add ( h )

    pool.close ()
    pool.join  ()
    
    logger.info ( "Histogram is %s" % result.dump ( 80 , 20 ) )
    logger.info ( "Entries  %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) 
    
    result.Draw (   ) 
    time.sleep  ( 2 )

    return result 
Example #15
0
def test_multiprocess_callable  () :
    """Test parallel processnig with multiprocess
    """
    logger =    getLogger ("ostap.test_multiprocess_callable")
    logger.info ('Test job submission with %s' %  multiprocess ) 
    
    if not dill :
        logger.error ( "dill is not available" )
        return
        
    if not multiprocess :
        logger.error ( "multiprocess is not available" )
        return 
        
    if DILL_PY3_issue : 
         logger.warning ("test is disabled for Python %s (DILL/ROOT/PY3 issue)" )
         return
    
    ncpus = multiprocess.cpu_count() 
    
    from multiprocess import Pool
    
    pool = Pool  ( ncpus ) 
    
    jobs = pool.imap_unordered ( mh , zip ( count() ,  inputs ) )
    
    result = None 
    for h in progress_bar ( jobs , max_value = len ( inputs ) ) :
        if not result  : result = h
        else           : result.Add ( h )

    pool.close ()
    pool.join  ()
    
    logger.info ( "Histogram is %s" % result.dump ( 80 , 20 ) )
    logger.info ( "Entries  %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) 
    
    with wait ( 1 ) , use_canvas ( 'test_multiprocess_callable' ) : 
        result.draw (   ) 

    return result 
Example #16
0
    def add_files ( self , files , max_files = -1 ) :
        """ Add files/patterns to data collector
        """
        
        if isinstance ( files  , str ) : files  = [ files  ]

        ## eliminate duplicates and sort 
        files = tuple ( sorted ( set ( files ) ) )
        
        nfiles    = len ( files )
        max_files = max_files if 0 <= max_files <= nfiles else nfiles 
        
        from ostap.utils.progress_bar import progress_bar
        for f in progress_bar ( files , silent = self.silent ) :
            
            if max_files <= len ( self.files ) :
                logger.debug ('Max-files limit is reached %s ' % max_files )
                break

            ## treat the file 
            self.treatFile ( f )
Example #17
0
def test_add_to_dataset(NEXP=10):
    """Add pseudoexepriments into RooDataSet
    """

    logger.info('Add %s pseudoexepriments into RooDataSet' % NEXP)

    files = prepare_data(1, 100000)

    logger.info('#files:    %s' % len(files))
    data = Data('S', files)
    logger.info('Initial Tree/Chain:\n%s' % data.chain.table(prefix='# '))

    import ostap.fitting.pyselectors
    dataset, _ = data.chain.fill_dataset(['mass', 'pt', 'eta'])

    logger.info('Initial dataset:\n%s' % dataset.table(prefix='# '))

    ## pseudo experiments
    for e in progress_bar(range(NEXP)):
        h2_new = h2.sample()
        func = Ostap.Functions.FuncRooTH2(h2_new, 'pt', 'eta')
        dataset.add_new_var('w%d' % e, func)

    logger.info('Dataset after:\n%s' % dataset.table(prefix='# '))

    counter = SE()
    for e in range(NEXP):
        weight = 'w%d' % e
        accepted = dataset.sumVar('1', weight * cut)
        rejected = dataset.sumVar('1', weight * ~cut)
        efficiency = 1 / (1 + rejected / accepted)
        logger.info("Experiment %3d, accepted/rejected %s/%s , eff = %s " %
                    (e, accepted, rejected, efficiency))

        counter += efficiency
    logger.info('Statistics of pseudoexperiments %s' % counter)
    logger.info('Mean/rms: %s[%%]/%.4f[%%]' %
                (counter.mean() * 100, counter.rms() * 100))
Example #18
0
def test_pathos_mp_function () :
    """Test parallel processnig with pathos: ProcessPool
    """
    logger = getLogger("ostap.test_pathos_mp_function")
    if not pathos :
        logger.error ( "pathos is not available" )
        return 
    
    logger.info ('Test job submission with %s' %  pathos ) 
    
    if DILL_PY3_issue : 
        logger.warning ("test is disabled (DILL/ROOT/PY3 issue)" )
        return
    
    from pathos.helpers import cpu_count
    ncpus = cpu_count  ()
    
    from pathos.pools import ProcessPool as Pool

    pool = Pool ( ncpus )
    logger.info ( "Pool is %s" % ( type ( pool ).__name__ ) )

    with pool_context   ( pool ) : 
        
        jobs = pool.uimap ( make_histo ,  zip ( count() , inputs ) )
        
        result = None 
        for h in progress_bar ( jobs , max_value = len ( inputs ) ) :
            if not result  : result = h
            else           : result.Add ( h )
                
    logger.info ( "Histogram is %s" % result.dump ( 80 , 10 )  )
    logger.info ( "Entries  %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) 
    
    with wait ( 1 ) , use_canvas ( 'test_pathos_mp_function' ) : 
        result.draw (   ) 

    return result 
Example #19
0
def test_ppft_method():
    """Test parallel python with object method  
    """
    logger = getLogger("ostap.test_ppft_method")
    logger.info('Test job submission with %s' % ppft)

    if not ppft:
        logger.error("ppft is not available")
        return

    from ostap.core.known_issues import DILL_ROOT_issue
    if DILL_ROOT_issue:
        logger.warning("test is disabled for Python %s (dill/ROOT issue)")
        return

    job_server = ppft.Server()

    jobs = [(i, job_server.submit(mh.process, (i, n)))
            for (i, n) in enumerate(inputs)]

    result = None
    for input, job in progress_bar(uimap(jobs), max_value=len(jobs)):
        histo = job()
        if not result: result = histo
        else:
            result.Add(histo)
            del histo

    logger.info("Histogram is %s" % result.dump(80, 20))
    logger.info("Entries  %s/%s" % (result.GetEntries(), sum(inputs)))

    job_server.print_stats()

    result.Draw()
    time.sleep(2)

    return result
Example #20
0
def make_toys2(
        gen_pdf,  ## pdf to generate toys 
        fit_pdf,  ## pdf to fit  
        nToys,  ## number of pseudoexperiments 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        gen_pars={},  ## gen-parameters to reset/use 
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated
        gen_fun=None,  ## generator function ( pdf , varset  , **gen_config ) 
        fit_fun=None,  ## fit       function ( pdf , dataset , **fit_config ) 
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset     )
        silent=True,
        progress=True,
        logger=logger,
        frequency=1000):
    """Make `ntoys` pseudoexperiments
    
    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( gen_pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( fit_pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result  , fit_pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - `pdf`         : PDF to be used for generation and fitting
    - `nToys`       : number    of pseudoexperiments to generate
    - `data`        : variable list of variables to be used for dataset generation
    - `gen_config`  : configuration of <code>pdf.generate</code>
    - `fit_config`  : configuration of <code>pdf.fitTo</code>
    - `gen_pars`    : redefine these parameters for generation of each pseudoexperiment
    - `fit_pars`    : redefine these parameters for fit of each pseudoexperiment
    - `silent`      : silent toys?
    - `progress`    : show progress bar?
    - `logger`      : use this logger 
    - `frequency`   : how often to dump the intermediate results ? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## varibales in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    ## 1. generator function?
    if gen_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``generate_data'' function!")
        gen_fun = generate_data
    assert gen_fun and callable(gen_fun), 'Invalid generator function!'

    ## 2. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 3. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_toys2: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    if progress and not silent:
        assert isinstance ( frequency , integer_types ) and 0 < frequency,\
               "make_toys2: invalid ``frequency'' parameter %s" % frequency

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    gparams = gen_pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in gparams:
                varset.add(gparams[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    ## parameters for generation

    fix_gen_init = vars_transform(gparams)
    fix_gen_pars = vars_transform(gen_pars)

    ## parameters for fitting

    fparams = fit_pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE

    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covarinace matrix quality

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        gen_pdf.load_params(params=fix_gen_init, silent=silent)
        gen_pdf.load_params(params=fix_gen_pars, silent=silent)

        ## 2. generate dataset!
        dataset = gen_fun(gen_pdf, varset=varset, **gen_config)
        if not silent: logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. reset parameters of fit_pdf
        fit_pdf.load_params(params=fix_fit_init, silent=silent)
        fit_pdf.load_params(params=fix_fit_pars, silent=silent)

        ## 4. fit it!
        r = fit_fun(fit_pdf, dataset, **fitcnf)

        ## fit status
        fits[r.status()] += 1

        ## covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, fit_pdf, dataset):

            ## 5. save results
            rpf = r.params(float_only=True)
            for j in rpf:
                results[j].append(rpf[j][0])

            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, fit_pdf))

            results['#'].append(len(dataset))
            results['#sumw'].append(dataset.sumVar('1'))

        dataset.clear()
        del dataset

        if progress or not silent:
            if 0 < frequency and 1 <= i and 0 == (i + 1) % frequency:
                stats = make_stats(results, fits, covs)
                print_stats(stats, i + 1, logger=logger)

    ## make a final statistics
    stats = make_stats(results, fits, covs)

    if progress or not silent:
        print_stats(stats, nToys, logger=logger)

    return results, stats
Example #21
0
def make_bootstrap(
        pdf,
        data,
        size=100,  ## numbere of samples 
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated
        fit_fun=None,  ## fit       function ( pdf , dataset , **fit_config ) 
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset     )
        silent=True,  ## silent processing?
        progress=True,  ## shpow progress bar? 
        logger=logger,  ## use this logger 
        frequency=100):
    """Run Bootstrap analysis, useful for evaluaton of fit biased and uncertainty estimates 
    In total `size` datasets are sampled (with replacement) from the original dataste
    `data` and each sampled dataset is fit
    >>> dataset = ...
    >>> model   = ...
    >>> r , f = model.fitTo ( dataset , .... )                         ## fit the whole dataset   
    >>> results, stats = make_bootstrap ( model , data , size = 1000 ) ## run Bootstrap 
    >>> print_bootstrap ( r , stats )                    ## print summary table 

    - `pdf`        : fit model
    - `data`       : original dataset
    - `size`       : number of datasets to sample
    - `fit_config` : configuration of `pdf.FitTo( data , ... )`
    - `fit_pars`   : redefine these parameters before each fit
    - `more_vars`  : calculate more variables from the fit-results
    - `fit_fun`    : specific fitting acion (if needed) 
    - `accept_fun` : specific accept action (if needed) 
    - `silent`     : silent processing?
    - `progress`   : show progress bar?
    - `logger`     : use this logger 
    - `frequency`  : how often dump the intermediate results? 
    """

    N = len(data)
    assert 1 < N, 'make_bootstrap: invalid dataset size %s' % N

    from ostap.core.ostap_types import integer_types
    assert isinstance ( size , integer_types ) and 0 < size, \
           "make_bootstrap: invalid ``size'' parameter %s" % size

    ## 1. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_bootstrap: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 2. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_bootstrap: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    if progress and not silent:
        assert isinstance ( frequency , integer_types ) and 0 < frequency,\
               "make_bootstrap: invalid ``frequency'' parameter %s" % frequency

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    ## parameters for fitting

    fparams = pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE
    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covarinace matrix quality

    ## fit original dataset
    pdf.load_params(params=fix_fit_init, silent=silent)
    pdf.load_params(params=fix_fit_pars, silent=silent)
    r_tot = fit_fun(pdf, data, **fitcnf)

    from ostap.utils.progress_bar import progress_bar
    ## run jackknife  bootstrapping
    for i, ds in progress_bar(enumerate(data.bootstrap(size)),
                              max_value=size,
                              silent=not progress):

        ## 2. reset parameters of fit_pdf
        pdf.load_params(params=fix_fit_init, silent=silent)
        pdf.load_params(params=fix_fit_pars, silent=silent)

        ## 3. fit it!
        r = fit_fun(pdf, ds, **fitcnf)

        ## 4. fit status
        fits[r.status()] += 1

        ## 5. covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, pdf, ds):

            ## 6. save results
            rpf = r.params(float_only=True)
            for j in rpf:
                results[j].append(rpf[j][0])

            ## 7. more variables to be calculated?
            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, pdf))

            results['#'].append(len(ds))
            results['#sumw'].append(ds.sumVar('1'))

        ds.clear()

        if progress or not silent:
            if 0 < frequency and 1 <= i and 0 == (i + 1) % frequency:
                stats = make_stats(results, fits, covs)
                ## print_stats ( stats , i + 1 , logger = logger )
                print_bootstrap(r_tot,
                                stats,
                                morevars=dict((k, more_vars[k](r_tot, pdf))
                                              for k in more_vars),
                                logger=logger)

    ## 8. make a final statistics
    stats = make_stats(results, fits, covs)

    if progress or not silent:

        ## 9. fit total dataset (twice)
        r_tot = fit_fun(pdf, data, **fitcnf)
        r_tot = fit_fun(pdf, data, **fitcnf)

        ## 10. the final table
        print_bootstrap(r_tot,
                        stats,
                        morevars=dict(
                            (k, more_vars[k](r_tot, pdf)) for k in more_vars),
                        logger=logger)

    return results, stats
Example #22
0
def makePlots(the_func,
              particle,
              stripping,
              polarity,
              trackcuts,
              runMin=0,
              runMax=-1,
              verbose=True,
              maxFiles=-1,
              parallel=False):

    #**********************************************************************
    from PIDPerfScripts.DataFuncs import CheckStripVer, CheckMagPol, CheckPartType
    CheckStripVer(stripping)
    CheckMagPol(polarity)
    CheckPartType(particle)

    #======================================================================
    # Create dictionary holding:
    # - Reconstruction version    ['RecoVer']
    # - np.array of:
    #        - MagUp run limits   ['UpRuns']
    #        - MagDown run limits ['DownRuns']
    #======================================================================
    from PIDPerfScripts.DataFuncs import GetRunDictionary
    DataDict = GetRunDictionary(stripping, particle, verbose=verbose)

    if trackcuts and 0 < runMin: trackcuts += ' && runNumber>=%d ' % runMin
    if trackcuts and 0 < runMax: trackcuts += ' && runNumber<=%d ' % runMax

    #======================================================================
    # Determine min and max file indicies
    #======================================================================
    if runMax < runMin: runMax = None
    from PIDPerfScripts.DataFuncs import GetMinMaxFileDictionary
    IndexDict = GetMinMaxFileDictionary(DataDict, polarity, runMin, runMax,
                                        maxFiles, verbose)

    #======================================================================
    # Append runNumber limits to TrackCuts
    #======================================================================

    logger.debug('Track Cuts: %s ' % trackcuts)

    #======================================================================
    # Declare default list of PID plots
    #======================================================================
    plots = []
    minEntries = 1000

    #======================================================================
    # Loop over all calibration subsamples
    #======================================================================

    mn = IndexDict['minIndex']
    mx = IndexDict['maxIndex']

    from ostap.utils.memory import memory
    from ostap.utils.utils import NoContext

    if parallel:

        logger.info('Parallel processing %d datafiles %s %s %s ' %
                    (mx - mn + 1, particle, stripping, polarity))
        task = PidCalibTask(the_func,
                            getconfig={
                                'particle': particle,
                                'stripping': stripping,
                                'polarity': polarity,
                                'trackcuts': trackcuts
                            },
                            verbose=False)

        from ostap.parallel.parallel import WorkManager
        wmgr = WorkManager(silent=False)

        wmgr.process(task, range(mn, mx + 1))
        return task.results()

    logger.info('Start the loop over %d datafiles %s %s %s ' %
                (mx - mn + 1, particle, stripping, polarity))

    from ostap.utils.progress_bar import progress_bar
    for index in progress_bar(xrange(mn, mx + 1)):

        manager = memory() if verbose else NoContext()
        with manager:

            dataset = getDataSet(particle,
                                 stripping,
                                 polarity,
                                 trackcuts,
                                 index,
                                 verbose=verbose)

            if not dataset: continue

            new_plots = plots = the_func(particle, dataset, plots, verbose)

            if not plots: plots = new_plots
            else:
                for oh, nh in zip(plots, new_plots):
                    oh.Add(nh)

            dataset.reset()
            if dataset: del dataset

    return plots
Example #23
0
def make_toys(
        pdf,
        nToys,
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        init_pars={},
        more_vars={},
        gen_fun=None,  ## generator function ( pdf , varset  , **config )
        fit_fun=None,  ## fit       function ( pdf , dataset , **config )
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset )
        silent=True,
        progress=True):
    """Make `nToys` pseudoexperiments

    -   Schematically:
    >>> for toy in range ( nToys )  :
    >>> ...  dataset = gen_fun ( pdf , ...     , **gen_config )
    >>> ...  result  = fit_fun ( pdf , dataset , **fit_config )
    >>> ...  if not accept_fun ( result , pdf , dataset ) : continue
    >>> .... < collect statistics here > 
    
    For each pseudoexperiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      number    of pseudoexperiments to generate
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - init_pars  redefine these parameters for each pseudoexperiment
    - more_vars  dictionary of functions to define the additional results
    - gen_fun    generator function
    - fit_fun    fitting   function
    - accept_fun accept    function
    - silent     silent toys?
    - progress   show progress bar? 
    
    It returns a dictionary with fit results for the toys and a dictionary of statistics
    
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## variables in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )

    Derived parameters can be also retrived via <code>more_vars</code> argument:
    >>> ratio    = lambda res,pdf : res.ratio('x','y') 
    >>> more_vars = { 'Ratio' : ratio }
    >>> r,  s = make_toys ( .... , more_vars = more_vars , ... ) 

    - If `gen_fun`    is not specified `generate_data` is used 
    - If `fit_fun`    is not specified `make_fit`      is used 
    - If `accept_fun` is not specified `accept_fit`    is used 
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    ## 1. generator function?
    if gen_fun is None:
        if not silent:
            logger.info("make_toys: use default ``generate_data'' function!")
        gen_fun = generate_data
    assert gen_fun and callable(gen_fun), 'Invalid generator function!'

    ## 2. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_toys: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 3. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_toys: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    params = pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in params:
                varset.add(params[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    fix_pars = vars_transform(params)
    fix_init = vars_transform(init_pars)

    pdf.load_params(params=fix_pars, silent=silent)
    pdf.load_params(params=fix_init, silent=silent)

    ## save all initial parameters (needed for the final statistics)
    params = pdf.params()
    fix_all = vars_transform(params)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE, VE

    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covariance matrix quality

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        pdf.load_params(params=fix_pars, silent=silent)
        pdf.load_params(params=init_pars, silent=silent)

        ## 2. generate dataset!
        ## dataset = pdf.generate ( varset = varset , **gen_config )
        dataset = gen_fun(pdf, varset=varset, **gen_config)
        if not silent:
            logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. fit it!
        r = fit_fun(pdf, dataset, **fitcnf)
        if not silent:
            logger.info('Fit result #%d\n%s' %
                        (i, r.table(title='Fit result #%d' % i, prefix='# ')))

        ## fit status
        fits[r.status()] += 1

        ## covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, pdf, dataset):

            ## 4. save results
            rpf = r.params(float_only=True)
            for p in rpf:
                results[p].append(rpf[p][0])

            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, pdf))

            results['#'].append(len(dataset))

        dataset.clear()
        del dataset
        del r

    ## make a final statistics
    stats = defaultdict(SE)

    for par in results:
        pars = results[par]
        mvar = par in more_vars
        if not mvar: a0 = fix_all.get(par, None)
        for v in pars:
            v0 = float(v)
            stats[par] += v0
            if not mvar and not a0 is None and isinstance(
                    v, VE) and 0 < v.error():
                stats['pull:%s' % par] += (v0 - a0) / v.error()

    for k in fits:
        stats['- Status  %s' % k] = fits[k]
    for k in covs:
        stats['- CovQual %s' % k] = covs[k]

    if progress or not silent: print_stats(stats, nToys)

    return results, stats
Example #24
0
def make_toys2(
        gen_pdf,  ## pdf to generate toys 
        fit_pdf,  ## pdf to fit  
        nToys,  ## number of pseudoexperiments 
        data,  ## template for dataset/variables 
        gen_config,  ## parameters for <code>pdf.generate</code>   
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        gen_pars={},  ## gen-parameters to reset/use 
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated  
        silent=True,
        progress=True):
    """Make `ntoys` pseudoexperiments
    
    For each experiment:

    1. generate dataset using `pdf` with variables specified
    in `data` and configuration specified via `gen_config`
    for each generation the parameters of `pdf` are reset
    for their initial values and valeus from `init_pars`
    
    2. fit generated dataset  with `pdf` using configuration
    specified via  `fit_config`

    - pdf        PDF to be used for generation and fitting
    - nToys      number    of pseudoexperiments to generate
    - data       variable list of variables to be used for dataset generation
    - gen_config configuration of <code>pdf.generate</code>
    - fit_config configuration of <code>pdf.fitTo</code>
    - gen_pars   redefine these parameters for generation of each pseudoexperiment
    - fit_pars   redefine these parameters for fit of each pseudoexperiment
    - silent     silent toys?
    - progress  show progress bar? 
    
    It returns a dictionary with fit results for the toys
    
    >>> pdf = ...
    ... results, stats = make_toys ( pdf     , ## PDF  to use 
    ...                 1000                 , ## number of toys 
    ...                 [ 'mass' ]           , ## varibales in dataset 
    ...                 { 'nEvents' : 5000 } , ## configuration of `pdf.generate`
    ...                 { 'ncpus'   : 2    } , ## configuration of `pdf.fitTo`
    ...                 { 'mean' : 0.0 , 'sigma' : 1.0 } ## parameters to use for generation 
    ...                )
    """

    from ostap.core.ostap_types import string_types, integer_types

    assert isinstance ( nToys , integer_types ) and 0 < nToys,\
           'Invalid "nToys" argument %s/%s' % ( nToys , type ( nToys ) )

    assert gen_config and 'nEvents' in gen_config,\
           'Number of events per toy must be specified via "gen_config" %s' % gen_config

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    gparams = gen_pdf.params()
    varset = ROOT.RooArgSet()

    if isinstance(data, ROOT.RooAbsData): varset = data.varset()
    else:
        for v in data:
            if isinstance(v, ROOT.RooAbsArg):
                varset.add(v)
            elif isinstance(v, string_types) and v in gparams:
                varset.add(gparams[v])
            else:
                raise TypeError('Invalid variable %s/%s' % (v, type(v)))

    ## parameters for generation

    fix_gen_init = vars_transform(gparams)
    fix_gen_pars = vars_transform(gen_pars)

    ## parameters for fitting

    fparams = fit_pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    ## run pseudoexperiments
    from ostap.utils.progress_bar import progress_bar
    for i in progress_bar(range(nToys), silent=not progress):

        ## 1. reset PDF parameters
        gen_pdf.load_params(None, fix_gen_init, silent=silent)
        gen_pdf.load_params(None, fix_gen_pars, silent=silent)

        ## 2. generate dataset!
        dataset = gen_pdf.generate(varset=varset, **gen_config)
        if not silent:
            logger.info('Generated dataset #%d\n%s' % (i, dataset))

        ## 3. reset parameters of fit_pdf
        fit_pdf.load_params(None, fix_fit_init, silent=silent)
        fit_pdf.load_params(None, fix_fit_pars, silent=silent)

        ## 4. fit it!
        r, _ = fit_pdf.fitTo(dataset, **fitcnf)
        if not silent:
            logger.info('Fit result #%d\n%s' %
                        (i, r.table(title='Fit result #%d' % i, prefix='# ')))

        ## skip invalid fits
        if r.status(): continue

        ## 5. save results
        rpf = r.params(float_only=True)
        for i in rpf:
            results[i].append(rpf[i][0])

        for v in more_vars:
            func = more_vars[v]
            results[v].append(func(r, fit_pdf))

        dataset.clear()
        del dataset

    ## make a final statistics
    from ostap.core.core import SE
    stats = defaultdict(SE)

    for par in results:
        pars = results[par]
        for v in pars:
            v0 = float(v)
            stats[par] += v0

    if progress or not silent: print_stats(stats, nToys)

    return results, stats

hl = ROOT.TH1F("hl", "length - z", 200, -0.01, 2.9)
hn = ROOT.TH1F("hl", "length - z", 200, -0.01, 2.9)
hn.SetLineColor(2)

hr = ROOT.TH1F("hr", "dr", 500, -0.01, 0.99)
ht = ROOT.TH1F("ht", "dr", 500, -0.01, 0.99)
ht.SetLineColor(2)

h_dir = ROOT.TH2F("h_dir", ";#phi_{in};#phi_{shift}", 100, -pi, pi, 100, -pi,
                  pi)

vdir = ROOT.TVector3()

for ev in progress_bar(range(330)):
    phi = 2. * pi * (random() - 0.5)
    inX = inR * cos(phi)
    inY = inR * sin(phi)
    pos, track_length = trace(inX, inY, inZ)
    outX, outY = pos.x(), pos.y()
    dr = sqrt((outX - inX)**2 + (outY - inY)**2)
    vdir.SetXYZ(outX - inX, outY - inY, 0.)
    h_dir.Fill(phi, vdir.Phi())
    #print( str(outX) + "  " +str(inX) + "  "+ str(outY) + "  "+str(inY) + "     "+str(phi))
    hl.Fill(1000. * (track_length - inZ))
    hr.Fill(dr)
    pos, track_length = trace(inX, inY, inZ, field=(False, False, True))
    outX, outY = pos.x(), pos.y()
    dr = sqrt((outX - inX)**2 + (outY - inY)**2)
    hn.Fill(1000. * (track_length - inZ))
Example #26
0
# Fit
r, w = model.fitTo(ds, draw=True, silent=True)
nll_obs = r.minNll()
#help(r)
print(r)
print(r("mean_sig"))
w.Draw()
canvas >> "MAIN"
#
Ntoys = 1000
toy_ds_list = []
for toy in range(Ntoys):
    toy_ds_list.append(model.generate(250))
#
nll_list = []
less = 0.
more = 0.
for toy in progress_bar(range(Ntoys)):
    r, w = model.fitTo(toy_ds_list[toy], draw=True, silent=True)
    nll_list.append(r.minNll())
    if r.minNll() < nll_obs:
        less += 1.
    else:
        more += 1.
#
w.Draw()
canvas >> "TEMP"
#
print("Less: " + str(100. * less / Ntoys) + " %")
print("More: " + str(100. * more / Ntoys) + " %")
Example #27
0
def make_jackknife(
        pdf,
        data,
        fit_config={},  ## parameters for <code>pdf.fitTo</code>
        fit_pars={},  ## fit-parameters to reset/use
        more_vars={},  ## additional  results to be calculated
        fit_fun=None,  ## fit       function ( pdf , dataset , **fit_config ) 
        accept_fun=None,  ## accept    function ( fit-result, pdf, dataset     )
        event_range=(),  ## event range for jackknife                      
        silent=True,
        progress=True,
        logger=logger,
        frequency=100):
    """Run Jackknife analysis, useful for evaluaton of fit biased and uncertainty estimates
    For each <code>i</code> remove event with index <code>i</code> from the dataset, and refit it.
    >>> dataset = ...
    >>> model   = ...
    >>> r , f = model.fitTo ( dataset , .... )           ## fit the whole dataset   
    >>> results, stats = make_jackknife ( model , data ) ## run Jackknife 
    >>> print_jackknife ( r , stats )                    ## print summary table 
    - see https://en.wikipedia.org/wiki/Jackknife_resampling
    - see print_jackknife 
    - see jackknife_statistics


    - `pdf`         : fit model
    - `data`        : original dataset
    - `fit_config`  : configuration of `pdf.FitTo( data , ... )`
    - `fit_pars`    : redefine these parameters before each fit
    - `more_vars`   : calculate more variables from the fit-results
    - `fit_fun`     : specific fitting acion (if needed) 
    - `accept_fun`  : specific accept action (if needed)
    - `event_range` : event range to use for jackknife   
    - `silent`      : silent processing?
    - `progress`    : show progress bar?
    - `logger`      : use this logger 
    - `frequency`   : how often to dump the intermediate results ? 
    """

    N = len(data)
    assert 1 < N, 'make_jackknife: invalid dataset size %s' % N

    if not event_range: event_range = 0, N
    assert 2 == len(
        event_range
    ), 'make_jackknife: invalid event range %s ' % str(event_range)

    begin, end = event_range

    ## check begin/end range
    assert 0 <= begin and begin < end and begin < N, 'make_jackknife: invalid event range (%s,%s)/%d' % (
        begin, end, N)
    ## adjust the end
    end = min(end, N)

    ## 1. fitting function?
    if fit_fun is None:
        if not silent:
            logger.info("make_jackknife: use default ``make_fit'' function!")
        fit_fun = make_fit
    assert fit_fun and callable(fit_fun), 'Invalid fit function!'

    ## 2. accept function?
    if accept_fun is None:
        if not silent:
            logger.info("make_jackknife: use default ``accept_fit'' function!")
        accept_fun = accept_fit
    assert accept_fun and callable(accept_fun), 'Invalid accept function!'

    if progress and not silent:
        assert isinstance ( frequency , integer_types ) and 0 < frequency,\
               "make_makejackknife: invalid ``frequency'' parameter %s" % frequency

    import ostap.fitting.roofit
    import ostap.fitting.dataset
    import ostap.fitting.variables
    import ostap.fitting.roofitresult
    import ostap.fitting.basic

    ## parameters for fitting

    fparams = pdf.params()
    fix_fit_init = vars_transform(fparams)
    fix_fit_pars = vars_transform(fit_pars)

    fitcnf = {}
    fitcnf.update(fit_config)
    if not 'silent' in fitcnf: fitcnf['silent'] = silent

    from collections import defaultdict
    results = defaultdict(list)

    from ostap.core.core import SE
    fits = defaultdict(SE)  ## fit statuses
    covs = defaultdict(SE)  ## covarinace matrix quality

    ## Fit the whole sample
    pdf.load_params(params=fix_fit_init, silent=silent)
    pdf.load_params(params=fix_fit_pars, silent=silent)
    r_tot = fit_fun(pdf, data, **fitcnf)

    from ostap.utils.progress_bar import progress_bar
    ## run jackknife  bootstrapping
    for i, ds in progress_bar(enumerate(data.jackknife(begin, end)),
                              max_value=end - begin,
                              silent=not progress):

        ## 2. reset parameters of fit_pdf
        pdf.load_params(params=fix_fit_init, silent=silent)
        pdf.load_params(params=fix_fit_pars, silent=silent)

        ## 3. fit it!
        r = fit_fun(pdf, ds, **fitcnf)

        ## 4. fit status
        fits[r.status()] += 1

        ## 5. covariance matrix quality
        covs[r.covQual()] += 1

        ## ok ?
        if accept_fun(r, pdf, ds):

            ## 6. save results
            rpf = r.params(float_only=True)
            for j in rpf:
                results[j].append(rpf[j][0])

            ## 7. more variables to be calculated?
            for v in more_vars:
                func = more_vars[v]
                results[v].append(func(r, pdf))

            results['#'].append(len(ds))
            results['#sumw'].append(ds.sumVar('1'))

        ds.clear()

        if progress or not silent:
            if 0 < frequency and 1 <= i and 0 == (i + 1) % frequency:
                stats = make_stats(results, fits, covs)
                print_stats(stats, i + 1, logger=logger)

    ## 8. make a final statistics
    stats = make_stats(results, fits, covs)

    if progress or not silent:

        ## 9. fit total dataset (twice)
        r_tot = fit_fun(pdf, data, **fitcnf)
        r_tot = fit_fun(pdf, data, **fitcnf)

        ## 10. the final table
        print_jackknife(r_tot,
                        stats,
                        morevars=dict(
                            (k, more_vars[k](r_tot, pdf)) for k in more_vars),
                        logger=logger)

    return results, stats
Example #28
0
            from array import array
            var1 = array('d', [0])
            var2 = array('d', [0])
            var3 = array('d', [0])

            treeSignal.Branch('var1', var1, 'var1/D')
            treeSignal.Branch('var2', var2, 'var2/D')
            treeSignal.Branch('var3', var3, 'var3/D')

            treeBkg.Branch('var1', var1, 'var1/D')
            treeBkg.Branch('var2', var2, 'var2/D')
            treeBkg.Branch('var3', var3, 'var3/D')

            ## fill background tuple:
            for i in progress_bar(range(nB)):
                ## for i in range ( nB ) :

                x = random.uniform(-2.0, 2.0)
                y = random.uniform(-2.0, 2.0)
                z = random.gauss(.0, 0.5)

                var1[0] = x + 0.1 * y
                var2[0] = x - 0.1 * y
                var3[0] = -x + z

                treeBkg.Fill()

            ## fill signal tuple:
            for i in progress_bar(range(nS)):
                ## for i in range ( nS ) :
Example #29
0
    def copy_files ( self , new_dir , parallel = False ) :
        """copy all the files to new directory
        - new directory will be created (if needed)
        - common path (prefix) for all files will be replaced by new directory
        """
        
        from ostap.utils.basic  import writeable,    copy_file
        from ostap.io.root_file import copy_file as copy_root_file 

        ## create directory if needed 
        if not os.path.exists ( new_dir ) : os.makedirs ( new_dir )
        
        assert writeable ( new_dir ), \
               "New directory ``%s'' is not writable!" % new_dir 

        nd = os.path.abspath  ( new_dir )
        nd = os.path.normpath ( nd      ) 
        nd = os.path.realpath ( nd      )
        
        cp = self.commonpath

        
        if parallel :

            regular_files  = [] 
            root_files     = [] 
            for f in self.__files :
                fs = os.path.normpath ( strip_protocol ( f ) ) 
                nf = fs.replace ( cp , nd ) 
                nf = os.path.normpath ( nf )
                pair = f , nf 
                if has_protocol ( f ) : root_files   .append ( pair )
                else                  : regular_files.append ( pair ) 
                
            from ostap.parallel.parallel_copy import copy_files as parallel_copy
            copied1 = []
            copied2 = []
            
            if regular_files :
                copied1 = parallel_copy ( regular_files , maxfiles = 1 , copier = copy_file      , progress = not self.silent )
            if root_files :
                copied2 = parallel_copy (    root_files , maxfiles = 1 , copier = copy_root_file , progress = not self.silent )
                
            copied  = [ d[1] for d in copied1 ] + [ d[1] for d in copied2 ] 

        else :
            
            copied = []
            from ostap.utils.progress_bar import progress_bar
            nf = len ( self.__files ) 
            for f in progress_bar ( self.__files , silent = self.silent or nf <=1 ) :
                
                fs = os.path.normpath ( strip_protocol ( f ) ) 
                nf = fs.replace ( cp , nd ) 
                nf = os.path.normpath ( nf )
                
                if not has_protocol ( f ) :
                    result = copy_file      ( f , nf , progress = ( 1 == nf ) and self.verbose ) 
                else                      :
                    result = copy_root_file ( f , nf , progress = ( 1 == nf ) and self.verbose ) 
                    
                copied.append ( result )
                
        copied = tuple ( copied )
        return self.clone ( files = copied , patterns = () )