Beispiel #1
0
    def process(self, item):
        """The actual processing
        ``params'' is assumed to be a tuple-like entity:
        - the file name
        - the tree name in the file
        - the variable/expression/expression list of quantities to project
        - the selection/weighting criteria 
        - the first entry in tree to process
        - number of entries to process
        """

        import ROOT
        from ostap.logger.utils import logWarning
        with logWarning():
            import ostap.core.pyrouts

        import ostap.trees.trees

        chain = item.chain
        first = item.first
        nevents = item.nevents

        ## Create the output histogram   NB! (why here???)
        self.output = 0, self.histo.Clone()

        ## use the regular projection
        from ostap.trees.trees import _tt_project_
        self.output = _tt_project_(chain, self.output[1], self.what, self.cuts,
                                   '', nevents, first)
        del item
Beispiel #2
0
    def process(self, jobid, item):
        """The actual processing
        ``params'' is assumed to be a tuple-like entity:
        - the file name
        - the tree name in the file
        - the variable/expression/expression list of quantities to project
        - the selection/weighting criteria 
        - the first entry in tree to process
        - number of entries to process
        """

        import ROOT
        from ostap.logger.utils import logWarning
        with logWarning():
            import ostap.core.pyrouts
            import ostap.trees.trees
            import ostap.histos.histos
            import ostap.frames.frames
            from ostap.trees.trees import Chain, Tree

        input = Chain(name=item.name,
                      files=item.files,
                      first=item.first,
                      nevents=item.nevents)

        chain = input.chain
        first = input.first
        nevents = input.nevents

        ## use the regular projection
        from ostap.trees.trees import _tt_project_

        ## Create the output histogram  NB! (why here???)
        from ostap.core.core import ROOTCWD

        with ROOTCWD():

            ROOT.gROOT.cd()
            histo = self.histo.Clone()
            self.__output = 0, histo

            from ostap.trees.trees import _tt_project_
            self.__output = _tt_project_(tree=chain,
                                         histo=histo,
                                         what=self.what,
                                         cuts=self.cuts,
                                         options='',
                                         nentries=nevents,
                                         firstentry=first)
        del item

        return self.__output
Beispiel #3
0
def cproject(chain, histo, what, cuts):
    """Make a projection of the loooong chain into histogram
    >>> chain = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> cproject        ( chain , histo , 'mass' , 'pt>10' )
    >>> chain.ppropject ( histo , 'mass' , 'pt>0' ) ## ditto 
    >>> chain.cpropject ( histo , 'mass' , 'pt>0' ) ## ditto     
    For 12-core machine, clear speedup factor of about 8 is achieved     
    """
    #
    if not chain:
        return 0, histo
    if not histo:
        logger.error('cproject: invalid histogram')
        return 0, histo

    import ROOT
    histo.Reset()

    if not isinstance(chain, ROOT.TChain):
        logger.warning(
            'cproject method is TChain-specific, skip parallelization')
        from ostap.trees.trees import _tt_project_
        return _tt_project_(chain, histo, what, cuts)

    if isinstance(cuts, ROOT.TCut): cuts = str(cuts)
    ##
    if isinstance(what, str): what = what.split(',')
    if isinstance(what, str): what = what.split(';')
    if isinstance(what, str): what = [what]

    import ostap.trees.trees
    files = chain.files()

    cname = chain.GetName()

    params = [(f, cname, str(w), cuts) for f in files for w in what]

    task = ProjectTask(histo)
    wmgr = Parallel.WorkManager()
    wmgr.process(task, params)

    filtered = task.output[0]
    histo += task.output[1]

    return filtered, histo
Beispiel #4
0
    def process(self, params):
        """The actual processing
        ``params'' is assumed to be a tuple-like entity:
        - the file name
        - the tree name in the file
        - the variable/expression/expression list of quantities to project
        - the selection/weighting criteria 
        - the first entry in tree to process
        - number of entries to process
        """

        import ROOT
        from ostap.logger.utils import logWarning
        with logWarning():
            import ostap.core.pyrouts

        if isinstance(params, str): params = (param, 0, n_large)
        elif isinstance(params, ROOT.TChainElement):
            params = (params.GetTitle(), 0, n_large)

        fname = params[0]  ## file name
        tname = params[1]  ## tree name
        what = params[2]  ## variable/expression to project
        cuts = params[3] if 3 < len(params) else ''  ## cuts
        first = params[4] if 4 < len(params) else 0  ## the first event
        nentries = params[5] if 5 < len(
            params) else n_large  ## number of events

        if isinstance(fname, ROOT.TChainElement): fname = fname.GetTitle()

        chain = ROOT.TChain(tname)
        chain.Add(fname)

        ## Create the output histogram   NB! (why here???)
        self.output = 0, self.histo.Clone()

        ## use the regular projection
        from ostap.trees.trees import _tt_project_
        self.output = _tt_project_(chain, self.output[1], what, cuts, '',
                                   nentries, first)
        del chain
Beispiel #5
0
def tproject(
        tree,  ## the tree 
        histo,  ## histogram 
        what,  ## variable/expression/list to be projected 
        cuts='',  ## selection/weighting criteria 
        nentries=-1,  ## number of entries 
        first=0,  ## the first entry 
        maxentries=1000000):  ## chunk size
    """Make a projection of the loooong tree into histogram
    >>> tree  = ... ## large chain
    >>> histo = ... ## histogram template 
    >>> tproject ( tree , histo , 'mass' , 'pt>10' )    
    >>> tree.pproject ( histo , 'mass' , 'pt>10' )    ## ditto 
    - significant gain can be achieved for very large ttrees with complicated expressions and cuts
    - maxentries parameter should be rather large
    Arguments:
    - tree       the tree
    - histo      the histogram
    - what       variable/expression/varlist to be projected
    - cuts       selection/weighting criteria 
    - nentries   number of entries to process  (>0: all entries in th tree)
    - first      the first entry to process
    - maxentries chunk size for parallel processing 
    """
    if not tree:
        return 0, histo
    if not histo:
        logger.error('tproject: invalid histogram')
        return 0, histo

    import ROOT
    histo.Reset()

    num = len(tree)
    if num <= first:
        return 0, histo

    if 0 > nentries: nentries = n_large

    maxentries = long(maxentries)
    if 0 >= maxentries: maxentries = n_large

    if 0 > first: first = 0

    ## use the regular projection
    from ostap.trees.trees import _tt_project_

    fname = None
    tname = None

    if isinstance(tree, ROOT.TChain):

        if 1 == len(tree.files()):

            fname = tree.files()[0]
            tname = tree.GetName()

        else:

            logger.warning('``tproject'
                           ' method is TTree-specific, skip parallelization')
            return _tt_project_(tree, histo, what, cuts, '', nentries, first)

    else:

        tdir = tree.GetDirectory()
        ftree = tdir.GetFile()
        if not ftree:
            logger.debug('TTree is not file resident, skip parallelization')
            return _tt_project_(tree, histo, what, cuts, '', total, first)
        fname = ftree.GetName()
        tpath = tdir.GetPath()
        pr, d, path = tpath.rpartition(':')
        tname = path + '/' + tree.GetName()

    if not fname:
        logger.info("Can't determine fname, skip parallelization")
        return _tt_project_(tree, histo, what, cuts, '', total, first)

    if not tname:
        logger.info("Can't determine tname, skip parallelization")
        return _tt_project_(tree, histo, what, cuts, '', total, first)

    #
    if isinstance(cuts, ROOT.TCut): cuts = str(cuts)
    if isinstance(what, ROOT.TCut): what = str(what)
    ##
    if isinstance(what, str): what = what.split(',')
    if isinstance(what, str): what = what.split(',')
    if isinstance(what, str): what = what.split(';')
    if isinstance(what, str): what = [what]

    ## nothing to project
    if not what:
        return 0, histo

    ## total number of events to process :
    total = min(num - first, nentries)

    ## the event range is rather short, no real need  in parallel processing
    if total * len(what) < maxentries and len(what) < 4:
        return _tt_project_(tree, histo, what, cuts, '', total, first)

    ## number of chunks & reminder
    nchunks, rest = divmod(total, maxentries)
    csize = int(total / nchunks)  ## chunk size

    ## final list of parameters [ (file_name, what , cuts , first_event , num_events ) , ... ]
    params = []

    for i in range(nchunks):
        for w in what:
            params.append(
                (fname, tname, str(w), cuts, first + i * csize, csize))

    if rest:
        nchunks += 1
        for w in what:
            params.append(
                (fname, tname, str(w), cuts, first + nchunks * csize, rest))

    task = ProjectTask(histo)
    wmgr = Parallel.WorkManager()
    wmgr.process(task, params)

    filtered = task.output[0]
    histo += task.output[1]

    return filtered, histo