Example #1
0
  def __init__(self, d={}, **kw):
    """
      Cluster finder class base on three parameters:
        code_book: centroids of the cluster given by any algorithm (e.g: kmeans)
        w        : weights, this will multipli the size of the cluster depends of the factor
                   e.g: the cluster was found 100 events and the w factor is 2. In the end we
                   will duplicate the events into the cluster to 200.
        matrix   : projection apply on the centroids.
        p_cluster: cluster target for each neuron map
    """
    d.update( kw ); del kw
    Subset.__init__(self,d) 

    self._code_book = d.pop('code_book', [])
    self._p_cluster = d.pop('p_cluster', [])
    self._w         = d.pop('w'  , 1   )
    checkForUnusedVars(d, self._warning )  
    del d
    # Some protections before start
    if type(self._code_book) is list:
      self._code_book = npCurrent.array(self._code_book)
    # If weigth factor is an integer, transform to an array of factors with the 
    # same size of the centroids
    if type(self._w) is int:
      self._w = npCurrent.int_array([self._w for i in range(self._code_book.shape[0])] )
    # transform to np.array if needed
    if type(self._w) is list:
      self._w = npCurrent.int_array(self._w)
    # In case to pass a list of weights, we need to check if weights and centroids has the same length.
    if self._w.shape[0] != self._code_book.shape[0]:
      raise ValueError("Weight factor must be an int, list or np.array with the same size than the code book param")
Example #2
0
 def __init__(self, w, b, **kwargs):
   Logger.__init__(self)
   self.layer     = retrieve_kw(kwargs, 'Layer'       ,0       )
   self.funcName  = retrieve_kw(kwargs, 'funcName'  ,None    )
   checkForUnusedVars(kwargs)
   self.W = np.matrix(w)
   self.b = np.transpose( np.matrix(b) )
Example #3
0
 def __init__(self, w, b, **kw):
     Logger.__init__(self, kw)
     self.layer = kw.pop("Layer", 0)
     self.func = kw.pop("Func", "tansig")
     checkForUnusedVars(kw, self._logger.warning)
     del kw
     self.W = np.matrix(w)
     self.b = np.transpose(np.matrix(b))
Example #4
0
 def __init__( self, **kw ):
   Logger.__init__( self, kw )
   self.references = ReferenceBenchmarkCollection( [] )
   self.doPerf                = retrieve_kw( kw, 'doPerf',                True                   )
   self.batchMethod           = BatchSizeMethod.retrieve(
                              retrieve_kw( kw, 'batchMethod', BatchSizeMethod.MinClassSize \
       if not 'batchSize' in kw or kw['batchSize'] is NotSet else BatchSizeMethod.Manual         ) 
                                )
   self.batchSize             = retrieve_kw( kw, 'batchSize',             NotSet                 )
   epochs                     = retrieve_kw( kw, 'epochs',                10000                  )
   maxFail                    = retrieve_kw( kw, 'maxFail',               50                     )
   self.useTstEfficiencyAsRef = retrieve_kw( kw, 'useTstEfficiencyAsRef', False                  )
   self._core, self._coreEnum = retrieve_core()
   self.sortIdx = None
   if self._coreEnum is TuningToolCores.ExMachina:
     self.trainOptions = dict()
     self.trainOptions['algorithmName'] = retrieve_kw( kw, 'algorithmName', 'rprop'       )
     self.trainOptions['print']         = retrieve_kw( kw, 'showEvo',       True          )
     self.trainOptions['networkArch']   = retrieve_kw( kw, 'networkArch',   'feedforward' )
     self.trainOptions['costFunction']  = retrieve_kw( kw, 'costFunction',  'sp'          )
     self.trainOptions['shuffle']       = retrieve_kw( kw, 'shuffle',       True          )
     self.trainOptions['nEpochs']       = epochs
     self.trainOptions['nFails']        = maxFail
     self.doMultiStop                   = False
   elif self._coreEnum is TuningToolCores.FastNet:
     seed = retrieve_kw( kw, 'seed', None )
     self._core = self._core( level = LoggingLevel.toC(self.level), seed = seed )
     self._core.trainFcn    = retrieve_kw( kw, 'algorithmName', 'trainrp' )
     self._core.showEvo     = retrieve_kw( kw, 'showEvo',       50        )
     self._core.multiStop   = retrieve_kw( kw, 'doMultiStop',   True      )
     self._core.epochs      = epochs
     self._core.maxFail     = maxFail
   else:
     self._logger.fatal("TuningWrapper not implemented for %s" % TuningToolCores.tostring(self._coreEnum))
   checkForUnusedVars(kw, self._logger.debug )
   del kw
   # Set default empty values:
   if self._coreEnum is TuningToolCores.ExMachina:
     self._emptyData  = npCurrent.fp_array([])
   elif self._coreEnum is TuningToolCores.FastNet:
     self._emptyData = list()
   self._emptyHandler = None
   if self._coreEnum is TuningToolCores.ExMachina:
     self._emptyTarget = npCurrent.fp_array([[]]).reshape( 
             npCurrent.access( pidx=1,
                               oidx=0 ) )
   elif self._coreEnum is TuningToolCores.FastNet:
     self._emptyTarget = None
   # Set holders:
   self._trnData    = self._emptyData
   self._valData    = self._emptyData
   self._tstData    = self._emptyData
   self._trnHandler = self._emptyHandler
   self._valHandler = self._emptyHandler
   self._tstHandler = self._emptyHandler
   self._trnTarget  = self._emptyTarget
   self._valTarget  = self._emptyTarget
   self._tstTarget  = self._emptyTarget
Example #5
0
 def __init__(self, filePath = None, **kw):
   """
   Either specify the file path where the file should be read or the data
   which should be appended to it
   with SubsetGeneratorArchieve("/path/to/file") as data:
     BLOCK
   SubsetGeneratorArchieve( "file/path", subsetCol= SubsetGeneratorPatternCollection([...]) )
   """
   Logger.__init__(self, kw)
   self._filePath = filePath
   self._subsetCol = kw.pop( 'subsetCol', None )
   checkForUnusedVars( kw, self._warning )
Example #6
0
  def __init__(self, filePath = None, **kw):
    """
    Either specify the file path where the file should be read or the data
    which should be appended to it

    with CrosValidArchieve("/path/to/file") as data:
      BLOCK

    CrosValidArchieve( "file/path", crossValid = CrossValid() )
    """
    Logger.__init__(self, kw)
    self._filePath = filePath
    self.crossValid = kw.pop( 'crossValid', None )
    checkForUnusedVars( kw, self._logger.warning )
Example #7
0
    def __init__(self, filePath=None, **kw):
        """
    Either specify the file path where the file should be read or the data
    which should be appended to it

    with CrosValidArchieve("/path/to/file") as data:
      BLOCK

    CrosValidArchieve( "file/path", crossValid = CrossValid() )
    """
        Logger.__init__(self, kw)
        self._filePath = filePath
        self.crossValid = kw.pop('crossValid', None)
        checkForUnusedVars(kw, self._warning)
Example #8
0
  def __init__(self, filePath = None, **kw):
    """
    Either specify the file path where the file should be read or the data
    which should be appended to it:

    with TuningJobConfigArchieve("/path/to/file") as data:
      BLOCK

    TuningJobConfigArchieve( "file/path", neuronBounds = ...,
                                          sortBounds = ...,
                                          initBounds = ... )
    """
    Logger.__init__(self, kw)
    self._filePath = filePath
    self.neuronBounds = kw.pop('neuronBounds', None)
    self._sortBounds = kw.pop('sortBounds', None)
    self._initBounds = kw.pop('initBounds', None)
    checkForUnusedVars( kw, self._warning )
Example #9
0
    def __init__(self, net, **kw):
        Logger.__init__(self, kw)

        train = kw.pop("train", None)
        checkForUnusedVars(kw, self._logger.warning)
        del kw

        # Extract the information from c++ wrapper code
        self.nNodes = []
        self.numberOfLayers = net.getNumLayers()

        self.dataTrain = None
        # Hold the train evolution information
        if train:
            self.dataTrain = DataTrainEvolution(train)
        self.layers = self.__retrieve(net)

        self._logger.debug("The Neural object was created.")
  def __init__(self, filePath = None, **kw):
    """
    Either specify the file path where the file should be read or the data
    which should be appended to it:

    with TuningJobConfigArchieve("/path/to/file") as data:
      BLOCK

    TuningJobConfigArchieve( "file/path", neuronBounds = ...,
                                          sortBounds = ...,
                                          initBounds = ... )
    """
    Logger.__init__(self, kw)
    self._filePath = filePath
    self.neuronBounds = kw.pop('neuronBounds', None)
    self._sortBounds = kw.pop('sortBounds', None)
    self._initBounds = kw.pop('initBounds', None)
    checkForUnusedVars( kw, self._logger.warning )
Example #11
0
 def __init__(self, filename, **kw):
   Logger.__init__(self,kw)
   self._title = kw.pop('title', 'Tuning Report')
   self._institute = kw.pop('institute', 'Universidade Federal do Rio de Janeiro (UFRJ)')
   checkForUnusedVars( kw, self._logger.warning )
   import socket
   self._machine = socket.gethostname()
   import getpass
   self._author = getpass.getuser()
   from time import gmtime, strftime
   self._data = strftime("%Y-%m-%d %H:%M:%S", gmtime())
   #Create output file
   self._pfile = open(filename+'.tex','w')
   # Import soma beamer contants
   from BeamerTemplates import BeamerConstants as bconst
   self._pfile.write( bconst.beginDocument )
   pname = self._author+'$@$'+self._machine
   self._pfile.write( (bconst.beginHeader) % \
             (self._title, self._title, pname, self._institute) )
   self._pfile.write( bconst.beginTitlePage )
Example #12
0
    def __call__(self, store, algname, **kwargs):

        basepath = retrieve_kw(kwargs, 'basepath', 'HLT/Egamma/Expert')
        dirname = retrieve_kw(kwargs, 'dirname', 'Efficiency')
        input_name = retrieve_kw(kwargs, 'inputname', 'HLT/')
        output_name = retrieve_kw(kwargs, 'outputname', 'HLT/match_')

        if 'level' in kwargs: self.level = kw.pop('level')
        checkForUnusedVars(kwargs, self._logger.warning)
        input_name = input_name.replace('//', '/')
        output_name = output_name.replace('//', '/')

        # some info to hold
        rawInfo = {'inputname': input_name,\
                   'outputname' : output_name,\
                   'basepath': basepath,\
                   'dirname':dirname,\
                   'algname':algname}

        # This can be:
        # Egamma/algname/Efficiency/L1Calo/match_eta
        path = (basepath + '/' + algname + '/' + dirname).replace('//', '/')

        try:  # protection
            self._logger.debug(
                'Extracting efficiencies information for %s from %s', algname,
                path)
            obj, eff, passed, total = self.__retrieve_efficiencies(store, path,\
                                      input_name, output_name)
        except RuntimeError:
            self._logger.error(
                ('Can not extract the efficiencies for this path %s') % (path))
            raise RuntimeError(
                'loop error in retrieve_efficiencies private method')

        # hold efficiencies values
        eobj = {'eff_et':obj['eff_et'], 'eff_eta':obj['eff_eta'], 'eff_mu':obj['eff_mu'],'eff_nvtx':obj['eff_nvtx'], \
            'eff':eff, 'passed':passed, 'total':total, 'rawInfo':rawInfo}
        self._logger.debug(('%s with efficiency: %1.2f  (%d/%d)')%\
                          (algname, eff, passed, total))
        return eobj
Example #13
0
  def __init__(self, filename, **kw):
    Logger.__init__(self,kw)
    self._title = kw.pop('title', 'Tuning Report')
    self._institute = kw.pop('institute', 'Universidade Federal do Rio de Janeiro (UFRJ)')

    checkForUnusedVars( kw, self._logger.warning )

    import socket
    self._machine = socket.gethostname()
    import getpass
    self._author = getpass.getuser()
    from time import gmtime, strftime
    self._data = strftime("%Y-%m-%d %H:%M:%S", gmtime())
    #Create output file
    self._pfile = open(filename+'.tex','w')

    from BeamerTemplates import BeamerConstants as bconst
    self._pfile.write( bconst.beginDocument )
    pname = self._author+'$@$'+self._machine
    self._pfile.write( (bconst.beginHeader) % \
              (self._title, self._title, pname, self._institute) )

    self._pfile.write( bconst.beginTitlePage )
  def __call__(self, **kw):
    """
      Create a collection of tuning job configuration files at the output
      folder.
    """

    # Cross validation configuration
    outputFolder   = retrieve_kw( kw, 'outputFolder',       'jobConfig'       )
    neuronBounds   = retrieve_kw( kw, 'neuronBounds', SeqLoopingBounds(5, 20) )
    sortBounds     = retrieve_kw( kw, 'sortBounds',   PythonLoopingBounds(50) )
    nInits         = retrieve_kw( kw, 'nInits',                100            )
    # Output configuration
    nNeuronsPerJob = retrieve_kw( kw, 'nNeuronsPerJob',         1             )
    nSortsPerJob   = retrieve_kw( kw, 'nSortsPerJob',           1             )
    nInitsPerJob   = retrieve_kw( kw, 'nInitsPerJob',          100            )
    compress       = retrieve_kw( kw, 'compress',              True           )
    if 'level' in kw: self.level = kw.pop('level')
    # Make sure that bounds variables are LoopingBounds objects:
    if not isinstance( neuronBounds, SeqLoopingBounds ):
      neuronBounds = SeqLoopingBounds(neuronBounds)
    if not isinstance( sortBounds, SeqLoopingBounds ):
      sortBounds   = PythonLoopingBounds(sortBounds)
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._logger.warning )
    del kw

    if nInits < 1:
      self._logger.fatal(("Cannot require zero or negative initialization "
          "number."), ValueError)

    # Do some checking in the arguments:
    nNeurons = len(neuronBounds)
    nSorts = len(sortBounds)
    if not nSorts:
      self._logger.fatal("Sort bounds is empty.")
    if nNeuronsPerJob > nNeurons:
      self._logger.warning(("The number of neurons per job (%d) is "
        "greater then the total number of neurons (%d), changing it "
        "into the maximum possible value."), nNeuronsPerJob, nNeurons )
      nNeuronsPerJob = nNeurons
    if nSortsPerJob > nSorts:
      self._logger.warning(("The number of sorts per job (%d) is "
        "greater then the total number of sorts (%d), changing it "
        "into the maximum possible value."), nSortsPerJob, nSorts )
      nSortsPerJob = nSorts

    # Create the output folder:
    mkdir_p(outputFolder)

    # Create the windows in which each job will loop upon:
    neuronJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( neuronBounds, 
                                                           nNeuronsPerJob )
    sortJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( sortBounds, 
                                                           nSortsPerJob )
    initJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( \
          PythonLoopingBounds( nInits ), \
          nInitsPerJob )

    # Loop over windows and create the job configuration
    for neuronWindowBounds in neuronJobsWindowList():
      for sortWindowBounds in sortJobsWindowList():
        for initWindowBounds in initJobsWindowList():
          self._logger.debug(('Retrieved following job configuration '
              '(bounds.vec) : '
              '[ neuronBounds=%s, sortBounds=%s, initBounds=%s]'),
              neuronWindowBounds.formattedString('hn'), 
              sortWindowBounds.formattedString('s'), 
              initWindowBounds.formattedString('i'))
          fulloutput = '{outputFolder}/job.{neuronStr}.{sortStr}.{initStr}'.format( 
                        outputFolder = outputFolder, 
                        neuronStr = neuronWindowBounds.formattedString('hn'), 
                        sortStr = sortWindowBounds.formattedString('s'),
                        initStr = initWindowBounds.formattedString('i') )
          savedFile = TuningJobConfigArchieve( fulloutput,
                                               neuronBounds = neuronWindowBounds,
                                               sortBounds = sortWindowBounds,
                                               initBounds = initWindowBounds ).save( compress )
          self._logger.info('Saved job option configuration at path: %s',
                            savedFile )
Example #15
0
    def __init__(self, **kw):
        Logger.__init__(self, kw)
        printArgs(kw, self._debug)
        self._nSorts = None
        self._nBoxes = None
        self._nTrain = None
        self._nValid = None
        self._nTest = None
        self._seed = None
        self._method = CrossValidMethod.retrieve(
            retrieve_kw(kw, 'method', CrossValidMethod.Standard))

        if self._method is CrossValidMethod.Standard:
            self._nSorts = retrieve_kw(kw, 'nSorts', 50)
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            self._nTrain = retrieve_kw(kw, 'nTrain', 6)
            self._nValid = retrieve_kw(kw, 'nValid', 4)
            self._nTest = retrieve_kw(
                kw, 'nTest', self._nBoxes - (self._nTrain + self._nValid))
            self._seed = retrieve_kw(kw, 'seed', None)
            checkForUnusedVars(kw, self._warning)
            # Check if variables are ok:
            if (not self._nTest is None) and self._nTest < 0:
                self._fatal("Number of test clusters is lesser than zero",
                            ValueError)
            totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \
                       self._nTrain + self._nValid
            if totalSum != self._nBoxes:
                self._fatal(
                    "Sum of train, validation and test boxes doesn't match.",
                    ValueError)

            np.random.seed(self._seed)

            # Test number of possible combinations (N!/((N-K)!(K)!) is greater
            # than the required sorts. If number of sorts (greater than half of the
            # possible cases) is close to the number of combinations, generate all
            # possible combinations and then gather the number of needed sorts.
            # However, as calculating factorial can be heavy, we don't do this if the
            # number of boxes is large.
            self._sort_boxes_list = []
            useRandomCreation = True
            from math import factorial
            if self._nBoxes < 201:
                totalPossibilities = ( factorial( self._nBoxes ) ) / \
                    ( factorial( self._nTrain ) * \
                      factorial( self._nValid ) * \
                      factorial( self._nTest  ) )
                if self._nSorts > (totalPossibilities / 2):
                    useRandomCreation = False
            if useRandomCreation:
                count = 0
                while True:
                    random_boxes = np.random.permutation(self._nBoxes)
                    random_boxes = tuple(
                        chain(
                            sorted(random_boxes[0:self._nTrain]),
                            sorted(random_boxes[self._nTrain:self._nTrain +
                                                self._nValid]),
                            sorted(random_boxes[self._nTrain +
                                                self._nValid:])))
                    # Make sure we are not appending same sort again:
                    if not random_boxes in self._sort_boxes_list:
                        self._sort_boxes_list.append(random_boxes)
                        count += 1
                        if count == self._nSorts:
                            break
            else:
                self._sort_boxes_list = list(
                    combinations_taken_by_multiple_groups(
                        range(self._nBoxes),
                        (self._nTrain, self._nValid, self._nTest)))
                for i in range(totalPossibilities - self._nSorts):
                    self._sort_boxes_list.pop(
                        np.random.random_integers(0, totalPossibilities))
        elif self._method is CrossValidMethod.JackKnife:
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            checkForUnusedVars(kw, self._warning)
            self._nSorts = self._nBoxes
            self._nTrain = self._nBoxes - 1
            self._nValid = 1
            self._nTest = 0
            self._sort_boxes_list = list(
                combinations_taken_by_multiple_groups(range(self._nBoxes), (
                    9,
                    1,
                )))
        elif self._method is CrossValidMethod.StratifiedKFold:
            self._nBoxes = retrieve_kw(kw, 'nBoxes', 10)
            self._shuffle = retrieve_kw(kw, 'shuffle', False)
            checkForUnusedVars(kw, self._logger.warning)
            self._nSorts = self._nBoxes
            self._nTrain = self._nBoxes - 1
            self._nValid = 1
            self._nTest = 0
Example #16
0
  def __call__( self, fList, ringerOperation, **kw):
    """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """
    # Offline information branches:
    __offlineBranches = ['el_et',
                         'el_eta',
                         #'el_loose',
                         #'el_medium',
                         #'el_tight',
                         'el_lhLoose',
                         'el_lhMedium',
                         'el_lhTight',
                         'mc_hasMC',
                         'mc_isElectron',
                         'mc_hasZMother',
                         'el_nPileupPrimaryVtx',
                         ]
    # Online information branches
    __onlineBranches = []
    __l2stdCaloBranches = ['trig_L2_calo_et',
                           'trig_L2_calo_eta',
                           'trig_L2_calo_phi',
                           'trig_L2_calo_e237', # rEta
                           'trig_L2_calo_e277', # rEta
                           'trig_L2_calo_fracs1', # F1: fraction sample 1
                           'trig_L2_calo_weta2', # weta2
                           'trig_L2_calo_ehad1', # energy on hadronic sample 1
                           'trig_L2_calo_emaxs1', # eratio
                           'trig_L2_calo_e2tsts1', # eratio
                           'trig_L2_calo_wstot',] # wstot
    __l2trackBranches = [ # Do not add non patter variables on this branch list
                         #'trig_L2_el_pt',
                         #'trig_L2_el_eta',
                         #'trig_L2_el_phi',
                         #'trig_L2_el_caloEta',
                         #'trig_L2_el_charge',
                         #'trig_L2_el_nTRTHits',
                         #'trig_L2_el_nTRTHiThresholdHits',
                         'trig_L2_el_etOverPt',
                         'trig_L2_el_trkClusDeta',
                         'trig_L2_el_trkClusDphi',]
    # Retrieve information from keyword arguments
    filterType            = retrieve_kw(kw, 'filterType',            FilterType.DoNotFilter )
    reference             = retrieve_kw(kw, 'reference',             Reference.Truth        )
    l1EmClusCut           = retrieve_kw(kw, 'l1EmClusCut',           None                   )
    l2EtCut               = retrieve_kw(kw, 'l2EtCut',               None                   )
    efEtCut               = retrieve_kw(kw, 'efEtCut',               None                   )
    offEtCut              = retrieve_kw(kw, 'offEtCut',              None                   )
    treePath              = retrieve_kw(kw, 'treePath',              None                   )
    nClusters             = retrieve_kw(kw, 'nClusters',             None                   )
    getRates              = retrieve_kw(kw, 'getRates',              True                   )
    getRatesOnly          = retrieve_kw(kw, 'getRatesOnly',          False                  )
    etBins                = retrieve_kw(kw, 'etBins',                None                   )
    etaBins               = retrieve_kw(kw, 'etaBins',               None                   )
    crossVal              = retrieve_kw(kw, 'crossVal',              None                   )
    ringConfig            = retrieve_kw(kw, 'ringConfig',            100                    )
    extractDet            = retrieve_kw(kw, 'extractDet',            None                   )
    standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False                  )
    useTRT                = retrieve_kw(kw, 'useTRT',                False                  )
    supportTriggers       = retrieve_kw(kw, 'supportTriggers',       True                   )
    monitoring            = retrieve_kw(kw, 'monitoring',            None                   )
    pileupRef             = retrieve_kw(kw, 'pileupRef',             NotSet                 )
    import ROOT, pkgutil
    #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
    #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
    if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \
       not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ):
        #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0:
      self._fatal("Could not load TuningTools library", ImportError)

    if 'level' in kw: self.level = kw.pop('level')
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._warning )
    del kw
    ### Parse arguments
    # Mutual exclusive arguments:
    if not getRates and getRatesOnly:
      self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.")
      getRates = True
    # Also parse operation, check if its type is string and if we can
    # transform it to the known operation enum:
    fList = csvStr2List ( fList )
    fList = expandFolders( fList )
    ringerOperation = RingerOperation.retrieve(ringerOperation)
    reference = Reference.retrieve(reference)
    if isinstance(l1EmClusCut, str):
      l1EmClusCut = float(l1EmClusCut)
    if l1EmClusCut:
      l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV
      __onlineBranches.append( 'trig_L1_emClus'  )
    if l2EtCut:
      l2EtCut = 1000.*l2EtCut # Put energy in MeV
      __onlineBranches.append( 'trig_L2_calo_et' )
    if efEtCut:
      efEtCut = 1000.*efEtCut # Put energy in MeV
      __onlineBranches.append( 'trig_EF_calo_et' )
    if offEtCut:
      offEtCut = 1000.*offEtCut # Put energy in MeV
      __offlineBranches.append( 'el_et' )
    if not supportTriggers:
      __onlineBranches.append( 'trig_L1_accept' )
    # Check if treePath is None and try to set it automatically
    if treePath is None:
      treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \
                 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH'
    # Check whether using bins
    useBins=False; useEtBins=False; useEtaBins=False
    nEtaBins = 1; nEtBins = 1
    # Set the detector which we should extract the information:
    if extractDet is None:
      if ringerOperation < 0:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2Calo:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2:
        extractDet = Detector.Tracking
      else:
        extractDet = Detector.CaloAndTrack
    else:
      extractDet = Detector.retrieve( extractDet )

    if etaBins is None: etaBins = npCurrent.fp_array([])
    if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins)
    if etBins is None: etBins = npCurrent.fp_array([])
    if type(etBins) is list: etBins=npCurrent.fp_array(etBins)

    if etBins.size:
      etBins = etBins * 1000. # Put energy in MeV
      nEtBins  = len(etBins)-1
      if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of et bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      # Flag that we are separating data through bins
      useBins=True
      useEtBins=True
      self._debug('E_T bins enabled.')

    if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
      ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1
    if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig)
    if not len(ringConfig):
      self._fatal('Rings size must be specified.');

    if etaBins.size:
      nEtaBins = len(etaBins)-1
      if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of eta bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      if len(ringConfig) != nEtaBins:
        self._fatal(('The number of rings configurations (%r) must be equal than '
                            'eta bins (%r) region config'),ringConfig, etaBins)
      useBins=True
      useEtaBins=True
      self._debug('eta bins enabled.')
    else:
      self._debug('eta/et bins disabled.')

    ### Prepare to loop:
    # Open root file
    t = ROOT.TChain(treePath)
    for inputFile in progressbar(fList, len(fList),
                                 logger = self._logger,
                                 prefix = "Creating collection tree "):

      # Check if file exists
      f  = ROOT.TFile.Open(inputFile, 'read')
      if not f or f.IsZombie():
        self._warning('Couldn''t open file: %s', inputFile)
        continue
      # Inform user whether TTree exists, and which options are available:
      self._debug("Adding file: %s", inputFile)
      obj = f.Get(treePath)
      if not obj:
        self._warning("Couldn't retrieve TTree (%s)!", treePath)
        self._info("File available info:")
        f.ReadAll()
        f.ReadKeys()
        f.ls()
        continue
      elif not isinstance(obj, ROOT.TTree):
        self._fatal("%s is not an instance of TTree!", treePath, ValueError)
      t.Add( inputFile )

    # Turn all branches off.
    t.SetBranchStatus("*", False)

    # RingerPhysVal hold the address of required branches
    event = ROOT.RingerPhysVal()

    # Add offline branches, these are always needed
    cPos = 0
    for var in __offlineBranches:
      self.__setBranchAddress(t,var,event)

    # Add online branches if using Trigger
    if ringerOperation > 0:
      for var in __onlineBranches:
        self.__setBranchAddress(t,var,event)


    ## Allocating memory for the number of entries
    entries = t.GetEntries()
    nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                else nClusters

    ## Retrieve the dependent operation variables:
    if useEtBins:
      etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et'
      self.__setBranchAddress(t,etBranch,event)
      self._debug("Added branch: %s", etBranch)
      if not getRatesOnly:
        npEt    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEt    with size %r", npEt.shape)

    if useEtaBins:
      etaBranch    = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta"
      self.__setBranchAddress(t,etaBranch,event)
      self._debug("Added branch: %s", etaBranch)
      if not getRatesOnly:
        npEta    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEta   with size %r", npEta.shape)

    # The base information holder, such as et, eta and pile-up
    if pileupRef is NotSet:
      if ringerOperation > 0:
        pileupRef = PileupReference.avgmu
      else:
        pileupRef = PileupReference.nvtx

    pileupRef = PileupReference.retrieve( pileupRef )

    self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) )

    if pileupRef is PileupReference.nvtx:
      pileupBranch = 'el_nPileupPrimaryVtx'
      pileupDataType = np.uint16
    elif pileupRef is PileupReference.avgmu:
      pileupBranch = 'avgmu'
      pileupDataType = np.float32
    else:
      raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef)
    baseInfoBranch = BaseInfo((etBranch, etaBranch,  pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',),
                              (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) )
    baseInfo = [None, ] * baseInfoBranch.nInfo

    # Make sure all baseInfoBranch information is available:
    for idx in baseInfoBranch:
      self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event)

    # Allocate numpy to hold as many entries as possible:
    if not getRatesOnly:
      # Retrieve the rings information depending on ringer operation
      ringerBranch = "el_ringsE" if ringerOperation < 0 else \
                     "trig_L2_calo_rings"
      self.__setBranchAddress(t,ringerBranch,event)
      if ringerOperation > 0:
        if ringerOperation is RingerOperation.L2:
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
      if standardCaloVariables:
        if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,):
          for var in __l2stdCaloBranches:
            self.__setBranchAddress(t, var, event)
        else:
          self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.",
                               RingerOperation.tostring(ringerOperation))
      t.GetEntry(0)
      npat = 0
      if extractDet in (Detector.Calorimetry,
                        Detector.CaloAndTrack,
                        Detector.All):
        if standardCaloVariables:
          npat+= 6
        else:
          npat += ringConfig.max()
      if extractDet in (Detector.Tracking,
                       Detector.CaloAndTrack,
                       Detector.All):
        if ringerOperation is RingerOperation.L2:
          if useTRT:
            self._info("Using TRT information!")
            npat += 2
            __l2trackBranches.append('trig_L2_el_nTRTHits')
            __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits')
          npat += 3
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
          self.__setBranchAddress(t,"trig_L2_el_pt",event)
        elif ringerOperation < 0: # Offline
          self._warning("Still need to implement tracking for the ringer offline.")
      npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size()
                                                   nobs=nobs)
                                     )
      self._debug("Allocated npPatterns with size %r", npPatterns.shape)

      # Add E_T, eta and luminosity information
      npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) )
                                    for idx in baseInfoBranch]
    else:
      npPatterns = npCurrent.fp_array([])
      npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch]

    ## Allocate the branch efficiency collectors:
    if getRates:
      if ringerOperation < 0:
        benchmarkDict = OrderedDict(
          [(  RingerOperation.Offline_CutBased_Loose  , 'el_loose'            ),
           (  RingerOperation.Offline_CutBased_Medium , 'el_medium'           ),
           (  RingerOperation.Offline_CutBased_Tight  , 'el_tight'            ),
           (  RingerOperation.Offline_LH_Loose        , 'el_lhLoose'          ),
           (  RingerOperation.Offline_LH_Medium       , 'el_lhMedium'         ),
           (  RingerOperation.Offline_LH_Tight        , 'el_lhTight'          ),
          ])
      else:
        benchmarkDict = OrderedDict(
          [( RingerOperation.L2Calo                  , 'trig_L2_calo_accept' ),
           ( RingerOperation.L2                      , 'trig_L2_el_accept'   ),
           ( RingerOperation.EFCalo                  , 'trig_EF_calo_accept' ),
           ( RingerOperation.HLT                     , 'trig_EF_el_accept'   ),
          ])


      from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
      branchEffCollectors = OrderedDict()
      branchCrossEffCollectors = OrderedDict()
      for key, val in benchmarkDict.iteritems():
        branchEffCollectors[key] = list()
        branchCrossEffCollectors[key] = list()
        # Add efficincy branch:
        if getRates or getRatesOnly:
          self.__setBranchAddress(t,val,event)
        for etBin in range(nEtBins):
          if useBins:
            branchEffCollectors[key].append(list())
            branchCrossEffCollectors[key].append(list())
          for etaBin in range(nEtaBins):
            etBinArg = etBin if useBins else -1
            etaBinArg = etaBin if useBins else -1
            argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ]
            branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) )
            if crossVal:
              branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) )
          # etBin
        # etaBin
      # benchmark dict
      if self._logger.isEnabledFor( LoggingLevel.DEBUG ):
        self._debug( 'Retrieved following branch efficiency collectors: %r',
            [collector[0].printName for collector in traverse(branchEffCollectors.values())])
    # end of (getRates)

    etaBin = 0; etBin = 0
    step = int(entries/100) if int(entries/100) > 0 else 1
    ## Start loop!
    self._info("There is available a total of %d entries.", entries)

    for entry in progressbar(range(entries), entries,
                             step = step, logger = self._logger,
                             prefix = "Looping over entries "):

      #self._verbose('Processing eventNumber: %d/%d', entry, entries)
      t.GetEntry(entry)

      # Check if it is needed to remove energy regions (this means that if not
      # within this range, it will be ignored as well for efficiency measuremnet)
      if event.el_et < offEtCut:
        self._verbose("Ignoring entry due to offline E_T cut.")
        continue
      # Add et distribution for all events

      if not monitoring is None:
        # Book all distribtions before the event selection
        self.__fillHistograms(monitoring,filterType,event,False)

      if ringerOperation > 0:
        # Remove events which didn't pass L1_calo
        if not supportTriggers and not event.trig_L1_accept:
          #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept)
          continue
        if event.trig_L1_emClus  < l1EmClusCut:
          #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut)
          continue
        if event.trig_L2_calo_et < l2EtCut:
          #self._verbose("Ignoring entry due to L2Calo E_T cut.")
          continue
        if  efEtCut is not None and event.trig_L2_calo_accept :
          # EF calo is a container, search for electrons objects with et > cut
          trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et)
          found=False
          for v in trig_EF_calo_et_list:
            if v < efEtCut:  found=True
          if found:
            #self._verbose("Ignoring entry due to EFCalo E_T cut.")
            continue

      # Set discriminator target:
      target = Target.Unknown
      if reference is Reference.Truth:
        if event.mc_isElectron and event.mc_hasZMother:
          target = Target.Signal
        elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ):
          target = Target.Background
      elif reference is Reference.Off_Likelihood:
        if event.el_lhTight: target = Target.Signal
        elif not event.el_lhLoose: target = Target.Background
      elif reference is Reference.AcceptAll:
        target = Target.Signal if filterType is FilterType.Signal else Target.Background
      else:
        if event.el_tight: target = Target.Signal
        elif not event.el_loose: target = Target.Background

      # Run filter if it is defined
      if filterType and \
         ( (filterType is FilterType.Signal and target != Target.Signal) or \
           (filterType is FilterType.Background and target != Target.Background) or \
           (target == Target.Unknown) ):
        #self._verbose("Ignoring entry due to filter cut.")
        continue

      # Add et distribution for all events
      if not monitoring is None:
        # Book all distributions after the event selection
        self.__fillHistograms(monitoring,filterType,event,True)

      # Retrieve base information:
      for idx in baseInfoBranch:
        lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
        baseInfo[idx] = lInfo
        if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo
      # Retrieve dependent operation region
      if useEtBins:
        etBin  = self.__retrieveBinIdx( etBins, baseInfo[0] )
      if useEtaBins:
        etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) )


      # Check if bin is within range (when not using bins, this will always be true):
      if (etBin < nEtBins and etaBin < nEtaBins):
        # Retrieve patterns:
        if not getRatesOnly:
          if useEtBins:  npEt[cPos] = etBin
          if useEtaBins: npEta[cPos] = etaBin
          ## Retrieve calorimeter information:
          cPat = 0
          caloAvailable = True
          if extractDet in (Detector.Calorimetry,
                           Detector.CaloAndTrack,
                           Detector.All):
            if standardCaloVariables:
              patterns = []
              if ringerOperation is RingerOperation.L2Calo:
                from math import cosh
                cosh_eta = cosh( event.trig_L2_calo_eta )
                # second layer ratio between 3x7 7x7
                rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277
                base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1
                # Ratio between first and second highest energy cells
                eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0
                # ratio of energy in the first layer (hadronic particles should leave low energy)
                F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta )
                # weta2 is calculated over the middle layer using 3 x 5
                weta2 = event.trig_L2_calo_weta2
                # wstot is calculated over the first layer using (typically) 20 strips
                wstot = event.trig_L2_calo_wstot
                # ratio between EM cluster and first hadronic layers:
                Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et
                # allocate patterns:
                patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1]
                for pat in patterns:
                  npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat
                  cPat += 1
              # end of ringerOperation
            else:
              # Remove events without rings
              if getattr(event,ringerBranch).empty():
                caloAvailable = False
              # Retrieve rings:
              if caloAvailable:
                try:
                  patterns = stdvector_to_list( getattr(event,ringerBranch) )
                  lPat = len(patterns)
                  if lPat == ringConfig[etaBin]:
                    npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns
                  else:
                    oldEtaBin = etaBin
                    if etaBin > 0 and ringConfig[etaBin - 1] == lPat:
                      etaBin -= 1
                    elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat:
                      etaBin += 1
                    npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns
                    self._warning(("Recovered event which should be within eta bin (%d: %r) "
                                          "but was found to be within eta bin (%d: %r). "
                                          "Its read eta value was of %f."),
                                          oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2],
                                          etaBin, etaBins[etaBin:etaBin+2],
                                          np.fabs( getattr(event,etaBranch)))
                except ValueError:
                  self._logger.error(("Patterns size (%d) do not match expected "
                                    "value (%d). This event eta value is: %f, and ringConfig is %r."),
                                    lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig
                                    )
                  continue
              else:
                if extractDet is Detector.Calorimetry:
                  # Also display warning when extracting only calorimetry!
                  self._warning("Rings not available")
                  continue
                self._warning("Rings not available")
                continue
              cPat += ringConfig.max()
            # which calo variables
          # end of (extractDet needed calorimeter)
          # And track information:
          if extractDet in (Detector.Tracking,
                           Detector.CaloAndTrack,
                           Detector.All):
            if caloAvailable or extractDet is Detector.Tracking:
              if ringerOperation is RingerOperation.L2:
                # Retrieve nearest deta/dphi only, so we need to find each one is the nearest:
                if event.trig_L2_el_trkClusDeta.size():
                  clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) )
                  clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) )
                  bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) )
                  for var in __l2trackBranches:
                    npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos]
                    cPat += 1
                else:
                  #self._verbose("Ignoring entry due to track information not available.")
                  continue
                  #for var in __l2trackBranches:
                  #  npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan
                  #  cPat += 1
              elif ringerOperation < 0: # Offline
                pass
            # caloAvailable or only tracking
          # end of (extractDet needs tracking)
        # end of (getRatesOnly)

        ## Retrieve rates information:
        if getRates:
          for branch in branchEffCollectors.itervalues():
            if not useBins:
              branch.update(event)
            else:
              branch[etBin][etaBin].update(event)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.update(event)
              else:
                branchCross[etBin][etaBin].update(event)
        # end of (getRates)

        # We only increment if this cluster will be computed
        cPos += 1
      # end of (et/eta bins)

      # Limit the number of entries to nClusters if desired and possible:
      if not nClusters is None and cPos >= nClusters:
        break
    # for end

    ## Treat the rings information
    if not getRatesOnly:

      ## Remove not filled reserved memory space:
      if npPatterns.shape[npCurrent.odim] > cPos:
        npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim)

      ## Segment data over bins regions:
      # Also remove not filled reserved memory space:
      if useEtBins:
        npEt  = npCurrent.delete( npEt, slice(cPos,None))
      if useEtaBins:
        npEta = npCurrent.delete( npEta, slice(cPos,None))
      # Treat
      npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                  nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                  npPatterns, )
      data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                                      nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                                      npData) for npData in npBaseInfo]
      npBaseInfo = npCurrent.array( data, dtype=np.object )
    else:
      npObject = npCurrent.array([], dtype=npCurrent.dtype)
    # not getRatesOnly

    if getRates:
      if crossVal:
        for etBin in range(nEtBins):
          for etaBin in range(nEtaBins):
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.finished()
              else:
                branchCross[etBin][etaBin].finished()

      # Print efficiency for each one for the efficiency branches analysed:
      for etBin in range(nEtBins) if useBins else range(1):
        for etaBin in range(nEtaBins) if useBins else range(1):
          for branch in branchEffCollectors.itervalues():
            lBranch = branch if not useBins else branch[etBin][etaBin]
            self._info('%s',lBranch)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin]
              lBranchCross.dump(self._debug, printSort = True,
                                 sortFcn = self._verbose)
          # for branch
        # for eta
      # for et
    # end of (getRates)

    outputs = []
    #if not getRatesOnly:
    outputs.extend((npObject, npBaseInfo))
    #if getRates:
    outputs.extend((branchEffCollectors, branchCrossEffCollectors))
    #outputs = tuple(outputs)
    return outputs
Example #17
0
  def getModels(self, summaryInfoList,  **kw):
        
    refBenchCol         = kw.pop( 'refBenchCol',       None              )
    configCol           = kw.pop( 'configCol',         []                )
    muBin               = kw.pop( 'muBin',             [-999,9999]       )
    checkForUnusedVars( kw, self._logger.warning )

    # Treat the summaryInfoList
    if not isinstance( summaryInfoList, (list,tuple)):
      summaryInfoList = [ summaryInfoList ]
    summaryInfoList = list(traverse(summaryInfoList,simple_ret=True))
    nSummaries = len(summaryInfoList)
    if not nSummaries:
      logger.fatal("Summary dictionaries must be specified!")

    if refBenchCol is None:
      refBenchCol = summaryInfoList[0].keys()

    # Treat the reference benchmark list
    if not isinstance( refBenchCol, (list,tuple)):
      refBenchCol = [ refBenchCol ] * nSummaries

    if len(refBenchCol) == 1:
      refBenchCol = refBenchCol * nSummaries

    nRefs = len(list(traverse(refBenchCol,simple_ret=True)))

    # Make sure that the lists are the same size as the reference benchmark:
    nConfigs = len(list(traverse(configCol,simple_ret=True)))
    if nConfigs == 0:
      configCol = [None for i in range(nRefs)]
    elif nConfigs == 1:
      configCol = configCol * nSummaries
    nConfigs = len(list(traverse(configCol,simple_ret=True)))

    if nConfigs != nRefs:
      logger.fatal("Summary size is not equal to the configuration list.", ValueError)
    
    if nRefs == nConfigs == nSummaries:
      # If user input data without using list on the configuration, put it as a list:
      for o, idx, parent, _, _ in traverse(configCol):
        parent[idx] = [o]
      for o, idx, parent, _, _ in traverse(refBenchCol):
        parent[idx] = [o]

    configCol   = list(traverse(configCol,max_depth_dist=1,simple_ret=True))
    refBenchCol = list(traverse(refBenchCol,max_depth_dist=1,simple_ret=True))
    nConfigs = len(configCol)
    nSummary = len(refBenchCol)

    if nRefs != nConfigs != nSummary:
      logger.fatal("Number of references, configurations and summaries do not match!", ValueError)

    discrList = []

    from itertools import izip, count
    for summaryInfo, refBenchmarkList, configList in zip(summaryInfoList,refBenchCol,configCol):

      if type(summaryInfo) is str:
        self._logger.info('Loading file "%s"...', summaryInfo)
        summaryInfo = load(summaryInfo)
      elif type(summaryInfo) is dict:
        pass
      else:
        logger.fatal("Cross-valid summary info is not string and not a dictionary.", ValueError)
    

      for idx, refBenchmarkName, config in izip(count(), refBenchmarkList, configList):
        
        try:
          key = filter(lambda x: refBenchmarkName in x, summaryInfo)[0]
          refDict = summaryInfo[ key ]
        except IndexError :
          self._logger.fatal("Could not find reference %s in summaryInfo. Available options are: %r", refBenchmarkName, summaryInfo.keys())
        
        self._logger.info("Using Reference key: %s", key )
        

        ppInfo = summaryInfo['infoPPChain']
        etBinIdx = refDict['etBinIdx']
        etaBinIdx = refDict['etaBinIdx']
        etBin = refDict['etBin']

        etaBin = refDict['etaBin']
        info   = refDict['infoOpBest'] if config is None else refDict['config_' + str(config).zfill(3)]['infoOpBest']
            
        # Check if user specified parameters for exporting discriminator
        # operation information:
        sort =  info['sort']
        init =  info['init']
        pyThres = info['cut']
        
        from RingerCore import retrieveRawDict
        if isinstance( pyThres, float ):
          pyThres = RawThreshold( thres = pyThres
                                , etBinIdx = etBinIdx, etaBinIdx = etaBinIdx
                                , etBin = etBin, etaBin =  etaBin)

        else:
          # Get the object from the raw dict
          pyThres = retrieveRawDict( pyThres )

        if pyThres.etBin is None:
          pyThres.etBin = etBin
        elif pyThres.etBin is '':
          pyThres.etBin = etBin
        elif isinstance( pyThres.etBin, (list,tuple)):
          pyThres.etBin = np.array( pyThres.etBin)

        if not(np.array_equal( pyThres.etBin, etBin )):
          self._logger.fatal("etBin does not match for threshold! Should be %r, is %r", pyThres.etBin, etBin )
        if pyThres.etaBin is None:
          pyThres.etaBin = etaBin
        elif pyThres.etaBin is '':
          pyThres.etaBin = etaBin
        elif isinstance( pyThres.etaBin, (list,tuple)):
          pyThres.etaBin = np.array( pyThres.etaBin)
        if not(np.array_equal( pyThres.etaBin, etaBin )):
          self._logger.fatal("etaBin does not match for threshold! Should be %r, is %r", pyThres.etaBin, etaBin )

        if type(pyThres) is RawThreshold:
          thresValues = [pyThres.thres]
        else:
          thresValues = [pyThres.slope, pyThres.intercept, pyThres.rawThres]

        pyPreProc = ppInfo['sort_'+str(sort).zfill(3)]['items'][0]
        pyPreProc = retrieveRawDict( pyPreProc )

        useCaloRings=False; useTrack=False; useShowerShape=False

        if type(pyPreProc) is Norm1:
          useCaloRings=True
        elif type(pyPreProc) is TrackSimpleNorm:
          useTrack=True
        elif type(pyPreProc) is ShowerShapesSimpleNorm:
          useShowerShape=True
        elif type(pyPreProc) is ExpertNetworksSimpleNorm:
          useCaloRings=True; useTrack=True
        elif type(pyPreProc) is ExpertNetworksShowerShapeSimpleNorm:
          useCaloRings=True; useShowerShape=True
        elif type(pyPreProc) is ExpertNetworksShowerShapeAndTrackSimpleNorm:
          useCaloRings=True; useTrack=True; useShowerShape=True
        elif type(pyPreProc) is PreProcMerge:
          for slot in pyPreProc.slots:
            if type(pyPreProc) is Norm1:
              useCaloRings=True
            elif type(pyPreProc) is TrackSimpleNorm:
              useTrack=True
            elif type(pyPreProc) is ShowerShapesSimpleNorm:
              useShowerShape=True
            elif type(pyPreProc) is ExpertNetworksSimpleNorm:
              useCaloRings=True; useTrack=True
            elif type(pyPreProc) is ExpertNetworksShowerShapeSimpleNorm:
              useCaloRings=True; useShowerShape=True
            elif type(pyPreProc) is ExpertNetworksShowerShapeAndTrackSimpleNorm:
              useCaloRings=True; useTrack=True; useShowerShape=True
        else:
          self._logger.fatal('PrepProc strategy not found...') 


        discrDict = info['discriminator']
        model   = { 
                  'discriminator' : discrDict,
                  'threshold'     : thresValues,
                  'etBin'         : etBin,
                  'etaBin'        : etaBin,
                  'muBin'         : muBin,
                  'etBinIdx'      : etBinIdx,
                  'etaBinIdx'     : etaBinIdx,
 
                  }
      
        removeOutputTansigTF = refDict.get('removeOutputTansigTF', None )
        
        model['removeOutputTansigTF'] = removeOutputTansigTF
        model['useCaloRings']         = useCaloRings
        model['useShowerShape']       = useShowerShape
        model['useTrack']             = useTrack

                 
        discrList.append( model )
  
        self._logger.info('neuron = %d, sort = %d, init = %d',
                     info['neuron'],
                     info['sort'],
                     info['init'])
        
      # for benchmark
    # for summay in list

    return discrList
Example #18
0
  def __init__(self, **kw ):
    Logger.__init__( self, kw  )
    printArgs( kw, self._logger.debug  )
    self._nSorts = None
    self._nBoxes = None
    self._nTrain = None
    self._nValid = None
    self._nTest  = None
    self._seed   = None
    self._method = CrossValidMethod.retrieve( retrieve_kw( kw, 'method', CrossValidMethod.Standard ) )

    if self._method is CrossValidMethod.Standard:
      self._nSorts = retrieve_kw( kw, 'nSorts', 50 )
      self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 )
      self._nTrain = retrieve_kw( kw, 'nTrain', 6  )
      self._nValid = retrieve_kw( kw, 'nValid', 4  )
      self._nTest  = retrieve_kw( kw, 'nTest',  self._nBoxes - ( self._nTrain + self._nValid ) )
      self._seed   = retrieve_kw( kw, 'seed',   None )
      checkForUnusedVars( kw, self._logger.warning )
      # Check if variables are ok:
      if (not self._nTest is None) and self._nTest < 0:
        self._logger.fatal("Number of test clusters is lesser than zero", ValueError)
      totalSum = self._nTrain + self._nValid + (self._nTest) if self._nTest else \
                 self._nTrain + self._nValid
      if totalSum != self._nBoxes:
        self._logger.fatal("Sum of train, validation and test boxes doesn't match.", ValueError)

      np.random.seed(self._seed)

      # Test number of possible combinations (N!/((N-K)!(K)!) is greater
      # than the required sorts. If number of sorts (greater than half of the
      # possible cases) is close to the number of combinations, generate all
      # possible combinations and then gather the number of needed sorts.
      # However, as calculating factorial can be heavy, we don't do this if the
      # number of boxes is large.
      self._sort_boxes_list = []
      useRandomCreation = True
      from math import factorial
      if self._nBoxes < 201:
        totalPossibilities = ( factorial( self._nBoxes ) ) / \
            ( factorial( self._nTrain ) * \
              factorial( self._nValid ) * \
              factorial( self._nTest  ) )
        if self._nSorts > (totalPossibilities / 2):
          useRandomCreation = False
      if useRandomCreation:
        count = 0
        while True:
          random_boxes = np.random.permutation(self._nBoxes)
          random_boxes = tuple(chain(sorted(random_boxes[0:self._nTrain]),
                          sorted(random_boxes[self._nTrain:self._nTrain+self._nValid]),
                          sorted(random_boxes[self._nTrain+self._nValid:])))
          # Make sure we are not appending same sort again:
          if not random_boxes in self._sort_boxes_list:
            self._sort_boxes_list.append( random_boxes )
            count += 1
            if count == self._nSorts:
              break
      else:
        self._sort_boxes_list = list(
            combinations_taken_by_multiple_groups(range(self._nBoxes),
                                                  (self._nTrain, 
                                                   self._nVal, 
                                                   self._nTest)))
        for i in range(totalPossibilities - self._nSorts):
          self._sort_boxes_list.pop( np.random_integers(0, totalPossibilities) )
    elif self._method is CrossValidMethod.JackKnife:
      self._nBoxes = retrieve_kw( kw, 'nBoxes', 10 )
      checkForUnusedVars( kw, self._logger.warning )
      self._nSorts = self._nBoxes
      self._nTrain = self._nBoxes - 1
      self._nValid = 1
      self._nTest  = 0
      self._sort_boxes_list = list(
          combinations_taken_by_multiple_groups(range(self._nBoxes), (9, 1,)) )
Example #19
0
    def __call__(self, fList, ringerOperation, **kw):
        """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of 
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """

        __eventBranches = [
            'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber',
            'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition',
            'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y',
            'isTagTag'
        ]

        __trackBranches = [
            'elCand2_deltaeta1', 'elCand2_DeltaPOverP',
            'elCand2_deltaphiRescaled', 'elCand2_d0significance',
            'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT'
        ]

        __monteCarloBranches = [
            'type',
            'origin',
            'originbkg',
            'typebkg',
            'isTruthElectronFromZ',
            'TruthParticlePdgId',
            'firstEgMotherPdgId',
            'TruthParticleBarcode',
            'firstEgMotherBarcode',
            'MotherPdgId',
            'MotherBarcode',
            'FirstEgMotherTyp',
            'FirstEgMotherOrigin',
            'dRPdgId',
        ]

        __onlineBranches = ['match', 'ringerMatch', 'ringer_rings']

        __offlineBranches = ['et', 'eta']

        # The current pid map used as offline reference
        pidConfigs = {
            key: value
            for key, value in RingerOperation.efficiencyBranches().iteritems()
            if key in (RingerOperation.Offline_LH_Tight,
                       RingerOperation.Offline_LH_Medium,
                       RingerOperation.Offline_LH_Loose,
                       RingerOperation.Offline_LH_VeryLoose)
        }

        # Retrieve information from keyword arguments
        filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter)
        reference = retrieve_kw(kw, 'reference', Reference.AcceptAll)
        offEtCut = retrieve_kw(kw, 'offEtCut', None)
        l2EtCut = retrieve_kw(kw, 'l2EtCut', None)
        treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate')
        nClusters = retrieve_kw(kw, 'nClusters', None)
        etBins = retrieve_kw(kw, 'etBins', None)
        etaBins = retrieve_kw(kw, 'etaBins', None)
        crossVal = retrieve_kw(kw, 'crossVal', None)
        ringConfig = retrieve_kw(kw, 'ringConfig', 100)
        monitoring = retrieve_kw(kw, 'monitoring', None)
        pileupRef = retrieve_kw(kw, 'pileupRef', NotSet)
        getRates = retrieve_kw(kw, 'getRates', True)
        getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False)
        getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False)
        extractDet = retrieve_kw(kw, 'extractDet', None)

        import ROOT
        #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
        #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
        if ROOT.gSystem.Load('libTuningTools') < 0:
            self._fatal("Could not load TuningTools library", ImportError)

        if 'level' in kw: self.level = kw.pop('level')
        # and delete it to avoid mistakes:
        checkForUnusedVars(kw, self._warning)
        del kw

        ### Parse arguments
        # Also parse operation, check if its type is string and if we can
        # transform it to the known operation enum:
        fList = csvStr2List(fList)
        fList = expandFolders(fList)
        ringerOperation = RingerOperation.retrieve(ringerOperation)
        reference = Reference.retrieve(reference)

        # Offline E_T cut
        if offEtCut:
            offEtCut = 1000. * offEtCut  # Put energy in MeV

        # Check whether using bins
        useBins = False
        useEtBins = False
        useEtaBins = False
        nEtaBins = 1
        nEtBins = 1

        if etaBins is None: etaBins = npCurrent.fp_array([])
        if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins)
        if etBins is None: etBins = npCurrent.fp_array([])
        if type(etBins) is list: etBins = npCurrent.fp_array(etBins)

        if etBins.size:
            etBins = etBins * 1000.  # Put energy in MeV
            nEtBins = len(etBins) - 1
            if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of et bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            # Flag that we are separating data through bins
            useBins = True
            useEtBins = True
            self._debug('E_T bins enabled.')

        if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
            ringConfig = [ringConfig] * (len(etaBins) -
                                         1) if etaBins.size else 1
        if type(ringConfig) is list:
            ringConfig = npCurrent.int_array(ringConfig)
        if not len(ringConfig):
            self._fatal('Rings size must be specified.')

        if etaBins.size:
            nEtaBins = len(etaBins) - 1
            if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of eta bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtaBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            if len(ringConfig) != nEtaBins:
                self._fatal((
                    'The number of rings configurations (%r) must be equal than '
                    'eta bins (%r) region config'), ringConfig, etaBins)
            useBins = True
            useEtaBins = True
            self._debug('eta bins enabled.')
        else:
            self._debug('eta/et bins disabled.')

        # The base information holder, such as et, eta and pile-up
        if pileupRef is NotSet:
            if ringerOperation > 0:
                pileupRef = PileupReference.avgmu
            else:
                pileupRef = PileupReference.nvtx

        pileupRef = PileupReference.retrieve(pileupRef)
        self._info("Using '%s' as pile-up reference.",
                   PileupReference.tostring(pileupRef))

        # Candidates: (1) is tags and (2) is probes. Default is probes
        self._candIdx = 1 if getTagsOnly else 2

        # Mutual exclusive arguments:
        if not getRates and getRatesOnly:
            self._logger.error(
                "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True."
            )
            getRates = True

        ### Prepare to loop:
        t = ROOT.TChain(treePath)
        for inputFile in progressbar(fList,
                                     len(fList),
                                     logger=self._logger,
                                     prefix="Creating collection tree "):
            # Check if file exists
            f = ROOT.TFile.Open(inputFile, 'read')
            if not f or f.IsZombie():
                self._warning('Couldn' 't open file: %s', inputFile)
                continue
            # Inform user whether TTree exists, and which options are available:
            self._debug("Adding file: %s", inputFile)
            obj = f.Get(treePath)
            if not obj:
                self._warning("Couldn't retrieve TTree (%s)!", treePath)
                self._info("File available info:")
                f.ReadAll()
                f.ReadKeys()
                f.ls()
                continue
            elif not isinstance(obj, ROOT.TTree):
                self._fatal("%s is not an instance of TTree!", treePath,
                            ValueError)
            t.Add(inputFile)
        # Turn all branches off.
        t.SetBranchStatus("*", False)
        # RingerPhysVal hold the address of required branches
        event = ROOT.SkimmedNtuple()
        # Ready to retrieve the total number of events
        t.GetEntry(0)
        ## Allocating memory for the number of entries
        entries = t.GetEntries()
        nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                    else nClusters
        ## Retrieve the dependent operation variables:
        if useEtBins:
            etBranch = ('elCand%d_et') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % (
                    self._candIdx)
            self.__setBranchAddress(t, etBranch, event)
            self._debug("Added branch: %s", etBranch)
            npEt = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEt    with size %r", npEt.shape)

        if useEtaBins:
            etaBranch = ('elCand%d_eta') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % (
                    self._candIdx)
            self.__setBranchAddress(t, etaBranch, event)
            self._debug("Added branch: %s", etaBranch)
            npEta = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEta   with size %r", npEta.shape)

        if reference is Reference.Truth:
            self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') %
                                    (self._candIdx), event)

        for var in __offlineBranches:
            self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var),
                                    event)
        #for var in pidConfigs.values():
        #  self.__setBranchAddress(t,var,event)

        for var in __trackBranches:
            self.__setBranchAddress(t, var, event)

        # Add online branches if using Trigger
        if ringerOperation > 0:
            for var in __onlineBranches:
                self.__setBranchAddress(t,
                                        ('fcCand%d_%s') % (self._candIdx, var),
                                        event)
        else:
            self.__setBranchAddress(t, ('elCand%d_%s') %
                                    (self._candIdx, 'ringer_rings'), event)

        if pileupRef is PileupReference.nvtx:
            pileupBranch = 'Nvtx'
            pileupDataType = np.uint16
        elif pileupRef is PileupReference.avgmu:
            pileupBranch = 'averageIntPerXing'
            pileupDataType = np.float32
        else:
            raise NotImplementedError(
                "Pile-up reference %r is not implemented." % pileupRef)

        #for var in __eventBranches +
        for var in [pileupBranch]:
            self.__setBranchAddress(t, var, event)

        ### Allocate memory
        if extractDet == (Detector.Calorimetry):
            npat = ringConfig.max()
        elif extractDet == (Detector.Tracking):
            npat = len(__trackBranches)
        # NOTE: Check if pat is correct for both Calo and track data
        elif extractDet in (Detector.CaloAndTrack, Detector.All):
            npat = ringConfig.max() + len(__trackBranches)

        npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape(
            npat=npat,  #getattr(event, ringerBranch).size()
            nobs=nobs))
        self._debug("Allocated npPatterns with size %r", npPatterns.shape)

        baseInfoBranch = BaseInfo(
            (etBranch, etaBranch, pileupBranch),
            (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType))

        baseInfo = [
            None,
        ] * baseInfoBranch.nInfo
        # Add E_T, eta and luminosity information
        npBaseInfo = [
            npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs),
                            dtype=baseInfoBranch.dtype(idx))
            for idx in baseInfoBranch
        ]

        from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
        branchEffCollectors = OrderedDict()
        branchCrossEffCollectors = OrderedDict()

        if ringerOperation < 0:
            from operator import itemgetter
            benchmarkDict = OrderedDict(
                sorted([(key, value) for key, value in
                        RingerOperation.efficiencyBranches().iteritems()
                        if key < 0 and not (isinstance(value, (list, tuple)))],
                       key=itemgetter(0)))
        else:
            benchmarkDict = OrderedDict()

        for key, val in benchmarkDict.iteritems():
            branchEffCollectors[key] = list()
            branchCrossEffCollectors[key] = list()
            # Add efficincy branch:
            if ringerOperation < 0:
                self.__setBranchAddress(t, val, event)

            for etBin in range(nEtBins):
                if useBins:
                    branchEffCollectors[key].append(list())
                    branchCrossEffCollectors[key].append(list())
                for etaBin in range(nEtaBins):
                    etBinArg = etBin if useBins else -1
                    etaBinArg = etaBin if useBins else -1
                    argList = [
                        RingerOperation.tostring(key), val, etBinArg, etaBinArg
                    ]
                    branchEffCollectors[key][etBin].append(
                        BranchEffCollector(*argList))
                    if crossVal:
                        branchCrossEffCollectors[key][etBin].append(
                            BranchCrossEffCollector(entries, crossVal,
                                                    *argList))
                # etBin
            # etaBin
        # benchmark dict

        if self._logger.isEnabledFor(LoggingLevel.DEBUG):
            self._debug(
                'Retrieved following branch efficiency collectors: %r', [
                    collector[0].printName
                    for collector in traverse(branchEffCollectors.values())
                ])

        etaBin = 0
        etBin = 0
        step = int(entries / 100) if int(entries / 100) > 0 else 1

        ## Start loop!
        self._info("There is available a total of %d entries.", entries)
        cPos = 0

        ### Loop over entries
        for entry in progressbar(range(entries),
                                 entries,
                                 step=step,
                                 logger=self._logger,
                                 prefix="Looping over entries "):

            self._verbose('Processing eventNumber: %d/%d', entry, entries)
            t.GetEntry(entry)

            #print self.__getEt(event)
            if event.elCand2_et < offEtCut:
                self._debug(
                    "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV",
                    event.elCand2_et, offEtCut)
                continue
            # Add et distribution for all events

            if ringerOperation > 0:
                if event.fcCand2_et < l2EtCut:
                    self._debug("Ignoring entry due Fast Calo E_T cut.")
                    continue
                # Add et distribution for all events

            # Set discriminator target:
            target = Target.Unknown
            # Monte Carlo cuts
            if reference is Reference.Truth:
                if getattr(event, ('elCand%d_isTruthElectronFromZ') %
                           (self._candIdx)):
                    target = Target.Signal
                elif not getattr(event, ('elCand%d_isTruthElectronFromZ') %
                                 (self._candIdx)):
                    target = Target.Background
            # Offline Likelihood cuts
            elif reference is Reference.Off_Likelihood:
                if getattr(event,
                           pidConfigs[RingerOperation.Offline_LH_Tight]):
                    target = Target.Signal
                elif not getattr(
                        event,
                        pidConfigs[RingerOperation.Offline_LH_VeryLoose]):
                    target = Target.Background
            # By pass everything (Default)
            elif reference is Reference.AcceptAll:
                target = Target.Signal if filterType is FilterType.Signal else Target.Background

            # Run filter if it is defined
            if filterType and \
               ( (filterType is FilterType.Signal and target != Target.Signal) or \
                 (filterType is FilterType.Background and target != Target.Background) or \
                 (target == Target.Unknown) ):
                #self._verbose("Ignoring entry due to filter cut.")
                continue

            ## Retrieve base information and rings:
            for idx in baseInfoBranch:
                lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
                baseInfo[idx] = lInfo
            # Retrieve dependent operation region
            if useEtBins:
                etBin = self.__retrieveBinIdx(etBins, baseInfo[0])
            if useEtaBins:
                etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1]))

            # Check if bin is within range (when not using bins, this will always be true):
            if (etBin < nEtBins and etaBin < nEtaBins):

                if useEtBins: npEt[cPos] = etBin
                if useEtaBins: npEta[cPos] = etaBin
                # Online operation
                cPat = 0
                caloAvailable = True
                if ringerOperation > 0 and self.__get_ringer_onMatch(
                        event) < 1:
                    continue
                # TODO Treat case where we don't use rings energy
                # Check if the rings empty
                if self.__get_rings_energy(event, ringerOperation).empty():
                    self._debug(
                        'No rings available in this event. Skipping...')
                    caloAvailable = False

                # Retrieve rings:
                if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack,
                                  Detector.All):
                    if caloAvailable:
                        try:
                            pass
                            patterns = stdvector_to_list(
                                self.__get_rings_energy(
                                    event, ringerOperation))
                            lPat = len(patterns)
                            if lPat == ringConfig[etaBin]:
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                            else:
                                oldEtaBin = etaBin
                                if etaBin > 0 and ringConfig[etaBin -
                                                             1] == lPat:
                                    etaBin -= 1
                                elif etaBin + 1 < len(
                                        ringConfig) and ringConfig[etaBin +
                                                                   1] == lPat:
                                    etaBin += 1
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                                self._warning((
                                    "Recovered event which should be within eta bin (%d: %r) "
                                    "but was found to be within eta bin (%d: %r). "
                                    "Its read eta value was of %f."),
                                              oldEtaBin,
                                              etaBins[oldEtaBin:oldEtaBin + 2],
                                              etaBin,
                                              etaBins[etaBin:etaBin + 2],
                                              np.fabs(getattr(
                                                  event, etaBranch)))
                        except ValueError:
                            self._logger.error((
                                "Patterns size (%d) do not match expected "
                                "value (%d). This event eta value is: %f, and ringConfig is %r."
                            ), lPat, ringConfig[etaBin],
                                               np.fabs(
                                                   getattr(event, etaBranch)),
                                               ringConfig)
                            continue
                        cPat += ringConfig[etaBin]
                    else:
                        # Also display warning when extracting only calorimetry!
                        self._warning("Rings not available")
                        continue

                if extractDet in (Detector.Tracking, Detector.CaloAndTrack,
                                  Detector.All):
                    for var in __trackBranches:
                        npPatterns[npCurrent.access(pidx=cPat,
                                                    oidx=cPos)] = getattr(
                                                        event, var)
                        if var == 'elCand2_eProbabilityHT':
                            from math import log
                            TRT_PID = npPatterns[npCurrent.access(pidx=cPat,
                                                                  oidx=cPos)]
                            epsilon = 1e-99
                            if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15
                            elif TRT_PID <= 0.0: TRT_PID = epsilon
                            tau = 15.0
                            TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0)
                            npPatterns[npCurrent.access(pidx=cPat,
                                                        oidx=cPos)] = TRT_PID
                        cPat += 1

                ## Retrieve rates information:
                if getRates and ringerOperation < 0:
                    #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 )
                    event.elCand2_isEMLoose2015 = not (
                        event.elCand2_isEMLoose2015 & 34896)
                    event.elCand2_isEMMedium2015 = not (
                        event.elCand2_isEMMedium2015 & 276858960)
                    event.elCand2_isEMTight2015 = not (
                        event.elCand2_isEMTight2015 & 281053264)

                    for branch in branchEffCollectors.itervalues():
                        if not useBins:
                            branch.update(event)
                        else:
                            branch[etBin][etaBin].update(event)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.update(event)
                            else:
                                branchCross[etBin][etaBin].update(event)
                # end of (getRates)

                if not monitoring is None:
                    self.__fillHistograms(monitoring, filterType, pileupRef,
                                          pidConfigs, event)

                # We only increment if this cluster will be computed
                cPos += 1
            # end of (et/eta bins)

            # Limit the number of entries to nClusters if desired and possible:
            if not nClusters is None and cPos >= nClusters:
                break
        # for end

        ## Treat the rings information
        ## Remove not filled reserved memory space:
        if npPatterns.shape[npCurrent.odim] > cPos:
            npPatterns = np.delete(npPatterns,
                                   slice(cPos, None),
                                   axis=npCurrent.odim)

        ## Segment data over bins regions:
        # Also remove not filled reserved memory space:
        if useEtBins:
            npEt = npCurrent.delete(npEt, slice(cPos, None))
        if useEtaBins:
            npEta = npCurrent.delete(npEta, slice(cPos, None))

        # Treat
        standardCaloVariables = False
        npObject = self.treatNpInfo(
            cPos,
            npEt,
            npEta,
            useEtBins,
            useEtaBins,
            nEtBins,
            nEtaBins,
            standardCaloVariables,
            ringConfig,
            npPatterns,
        )

        data = [
            self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins,
                             nEtaBins, standardCaloVariables, ringConfig,
                             npData) for npData in npBaseInfo
        ]
        npBaseInfo = npCurrent.array(data, dtype=np.object)

        if getRates:
            if crossVal:
                for etBin in range(nEtBins):
                    for etaBin in range(nEtaBins):
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.finished()
                            else:
                                branchCross[etBin][etaBin].finished()

            # Print efficiency for each one for the efficiency branches analysed:
            for etBin in range(nEtBins) if useBins else range(1):
                for etaBin in range(nEtaBins) if useBins else range(1):
                    for branch in branchEffCollectors.itervalues():
                        lBranch = branch if not useBins else branch[etBin][
                            etaBin]
                        self._info('%s', lBranch)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            lBranchCross = branchCross if not useBins else branchCross[
                                etBin][etaBin]
                            lBranchCross.dump(self._debug,
                                              printSort=True,
                                              sortFcn=self._verbose)
                    # for branch
                # for eta
            # for et
        else:
            branchEffCollectors = None
            branchCrossEffCollectors = None
        # end of (getRates)

        outputs = []
        outputs.extend((npObject, npBaseInfo))
        if getRates:
            outputs.extend((branchEffCollectors, branchCrossEffCollectors))

        return outputs
Example #20
0
 def __init__(self, **kw):
     Logger.__init__(self, kw)
     self.references = ReferenceBenchmarkCollection([])
     coreframe = coreConf.core_framework()
     self.doPerf = retrieve_kw(kw, 'doPerf', True)
     self.batchMethod           = BatchSizeMethod.retrieve(
                                retrieve_kw( kw, 'batchMethod', BatchSizeMethod.MinClassSize \
         if not 'batchSize' in kw or kw['batchSize'] is NotSet else BatchSizeMethod.Manual         )
                                  )
     self.batchSize = retrieve_kw(kw, 'batchSize', NotSet)
     epochs = retrieve_kw(kw, 'epochs', 10000)
     maxFail = retrieve_kw(kw, 'maxFail', 50)
     self.useTstEfficiencyAsRef = retrieve_kw(kw, 'useTstEfficiencyAsRef',
                                              False)
     self._merged = retrieve_kw(kw, 'merged', False)
     self.networks = retrieve_kw(kw, 'networks', NotSet)
     self.sortIdx = None
     if coreConf() is TuningToolCores.FastNet:
         seed = retrieve_kw(kw, 'seed', None)
         self._core = coreframe(level=LoggingLevel.toC(self.level),
                                seed=seed)
         self._core.trainFcn = retrieve_kw(kw, 'algorithmName', 'trainrp')
         self._core.showEvo = retrieve_kw(kw, 'showEvo', 50)
         self._core.multiStop = retrieve_kw(kw, 'doMultiStop', True)
         self._core.epochs = epochs
         self._core.maxFail = maxFail
         # TODO Add properties
     elif coreConf() is TuningToolCores.keras:
         self._core = coreframe
         from keras import callbacks
         from keras.optimizers import RMSprop, SGD
         from TuningTools.keras_util.callbacks import PerformanceHistory
         self.trainOptions = dict()
         self.trainOptions['optmin_alg'] = retrieve_kw(
             kw, 'optmin_alg', RMSprop(lr=0.001, rho=0.9, epsilon=1e-08))
         #self.trainOptions['optmin_alg']    = retrieve_kw( kw, 'optmin_alg',    SGD(lr=0.1, decay=1e-6, momentum=0.7)  )
         self.trainOptions['costFunction'] = retrieve_kw(
             kw, 'binary_crossentropy', 'mean_squared_error'
         )  # 'binary_crossentropy' #'mean_squared_error' #
         self.trainOptions['metrics'] = retrieve_kw(kw, 'metrics', [
             'accuracy',
         ])
         self.trainOptions['shuffle'] = retrieve_kw(kw, 'shuffle', True)
         self._multiStop = retrieve_kw(kw, 'doMultiStop', True)
         self.trainOptions['nEpochs'] = epochs
         self.trainOptions['nFails'] = maxFail
         #self._earlyStopping = callbacks.EarlyStopping( monitor='val_Tuning_L2Calo_SP_sp_value' #  val_loss, self.trainOptions['metrics'][0] FIXME This must change
         #                                             , patience=self.trainOptions['nFails']
         #                                             , verbose=0
         #                                             , mode='max')
         self._earlyStopping = callbacks.EarlyStopping(
             monitor='val_loss'  # val_acc
             ,
             patience=self.trainOptions['nFails'],
             verbose=0,
             mode='auto')
         self._historyCallback = PerformanceHistory(
             display=retrieve_kw(kw, 'showEvo', 50))
     else:
         self._fatal("TuningWrapper not implemented for %s", coreConf)
     checkForUnusedVars(kw, self._debug)
     del kw
     # Set default empty values:
     if coreConf() is TuningToolCores.keras:
         self._emptyData = npCurrent.fp_array([])
     elif coreConf() is TuningToolCores.FastNet:
         self._emptyData = list()
     self._emptyHandler = None
     if coreConf() is TuningToolCores.keras:
         self._emptyTarget = npCurrent.fp_array([[]]).reshape(
             npCurrent.access(pidx=1, oidx=0))
     elif coreConf() is TuningToolCores.FastNet:
         self._emptyTarget = None
     # Set holders:
     self._trnData = self._emptyData
     self._valData = self._emptyData
     self._tstData = self._emptyData
     self._trnHandler = self._emptyHandler
     self._valHandler = self._emptyHandler
     self._tstHandler = self._emptyHandler
     self._trnTarget = self._emptyTarget
     self._valTarget = self._emptyTarget
     self._tstTarget = self._emptyTarget
Example #21
0
  def __call__(self, **kw):
    """
      Create a collection of tuning job configuration files at the output
      folder.
    """

    # Cross validation configuration
    outputFolder   = retrieve_kw( kw, 'outputFolder',       'jobConfig'       )
    neuronBounds   = retrieve_kw( kw, 'neuronBounds', SeqLoopingBounds(5, 20) )
    sortBounds     = retrieve_kw( kw, 'sortBounds',   PythonLoopingBounds(50) )
    nInits         = retrieve_kw( kw, 'nInits',                100            )
    # Output configuration
    nNeuronsPerJob = retrieve_kw( kw, 'nNeuronsPerJob',         1             )
    nSortsPerJob   = retrieve_kw( kw, 'nSortsPerJob',           1             )
    nInitsPerJob   = retrieve_kw( kw, 'nInitsPerJob',          100            )
    compress       = retrieve_kw( kw, 'compress',              True           )
    prefix         = retrieve_kw( kw, 'prefix'  ,             'job'           )
  
    if 'level' in kw: self.level = kw.pop('level')
    # Make sure that bounds variables are LoopingBounds objects:
    if not isinstance( neuronBounds, SeqLoopingBounds ):
      neuronBounds = SeqLoopingBounds(neuronBounds)
    if not isinstance( sortBounds, SeqLoopingBounds ):
      sortBounds   = PythonLoopingBounds(sortBounds)
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._warning )
    del kw

    if nInits < 1:
      self._fatal(("Cannot require zero or negative initialization "
          "number."), ValueError)

    # Do some checking in the arguments:
    nNeurons = len(neuronBounds)
    nSorts = len(sortBounds)
    if not nSorts:
      self._fatal("Sort bounds is empty.")
    if nNeuronsPerJob > nNeurons:
      self._warning(("The number of neurons per job (%d) is "
        "greater then the total number of neurons (%d), changing it "
        "into the maximum possible value."), nNeuronsPerJob, nNeurons )
      nNeuronsPerJob = nNeurons
    if nSortsPerJob > nSorts:
      self._warning(("The number of sorts per job (%d) is "
        "greater then the total number of sorts (%d), changing it "
        "into the maximum possible value."), nSortsPerJob, nSorts )
      nSortsPerJob = nSorts

    # Create the output folder:
    mkdir_p(outputFolder)

    # Create the windows in which each job will loop upon:
    neuronJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( neuronBounds, 
                                                           nNeuronsPerJob )
    sortJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( sortBounds, 
                                                           nSortsPerJob )
    initJobsWindowList = \
        CreateTuningJobFiles._retrieveJobLoopingBoundsCol( \
          PythonLoopingBounds( nInits ), \
          nInitsPerJob )

    # Loop over windows and create the job configuration
    for neuronWindowBounds in neuronJobsWindowList():
      for sortWindowBounds in sortJobsWindowList():
        for initWindowBounds in initJobsWindowList():
          self._debug(('Retrieved following job configuration '
              '(bounds.vec) : '
              '[ neuronBounds=%s, sortBounds=%s, initBounds=%s]'),
              neuronWindowBounds.formattedString('hn'), 
              sortWindowBounds.formattedString('s'), 
              initWindowBounds.formattedString('i'))
          fulloutput = '{outputFolder}/{prefix}.{neuronStr}.{sortStr}.{initStr}'.format( 
                        outputFolder = outputFolder, 
                        prefix = prefix,
                        neuronStr = neuronWindowBounds.formattedString('hn'), 
                        sortStr = sortWindowBounds.formattedString('s'),
                        initStr = initWindowBounds.formattedString('i') )
          savedFile = TuningJobConfigArchieve( fulloutput,
                                               neuronBounds = neuronWindowBounds,
                                               sortBounds = sortWindowBounds,
                                               initBounds = initWindowBounds ).save( compress )
          self._info('Saved job option configuration at path: %s',
                            savedFile )
Example #22
0
    def __call__(self, **kwargs):
        key = retrieve_kw(kwargs, 'key', None)
        outputdir = retrieve_kw(kwargs, 'outputdir', 'plots')
        drawSame = retrieve_kw(kwargs, 'drawsame', True)
        atlaslabel = retrieve_kw(kwargs, 'atlaslabel', 'Internal')
        isBackground = retrieve_kw(kwargs, 'isbackground', False)
        doRatio = retrieve_kw(kwargs, 'doRatio', False)
        doEffLabel = retrieve_kw(kwargs, 'doEffLabel', True)
        checkForUnusedVars(kwargs, self._logger.warning)

        import os
        localpath = os.getcwd() + '/' + outputdir
        try:
            os.mkdir(localpath)
        except:
            self._logger.warning('The director %s exist.', localpath)

        import base64
        if key:
            if not ':' in key:
                key = base64.b64decode(key)
                self._logger.info('Translate key to: %s', key)
            key = key.split(',')
            cList = self.translate(key)
        else:
            self.display()
            key = input('Write the configuration: ')
            self._logger.info(
                'Use this key %s if you want to reproduce this plot',
                base64.b64encode(key))
            key = key.split(',')
            cList = self.translate(key)

        # FIXME: The subdirs must be the same for all configuration
        subdirs = cList[0]['subdirs']

        figures = []

        for subdir in subdirs:
            eff_list = list()
            legends = list()
            algnames = None
            for index, c in enumerate(cList):
                effObj = self._effReader(self._store[c['file']],
                                         c['algname'],
                                         basepath=c['basepath'],
                                         dirname=c['dirname'],
                                         inputname=subdir + '/',
                                         outputname=subdir + '/match_')
                eff_list.append(effObj)
                legends.append(c['legend'])
                if not algnames:
                    algnames = c['algname']
                else:
                    algnames += '_' + c['algname']


            f = profile( eff_list,
                 ylimits          = [ (0.0, 1.4) , (0.0, 1.2), (0.0, 1.4), (0.0, 1.4) ] \
                     if not isBackground else [(-0.3,0.1),(-0.3,0.4),(-0.3,0.4),(-0.3, 0.4)],
                 ylabel           = 'Trigger Efficiency',
                 hist_names       = ['eff_et','eff_eta','eff_mu'],
                 legend_header    = '',
                 legend_prefix    = '',
                 legends          = legends,
                 region_label     = '',
                 oname            = localpath+'/'+subdir+'_'+ algnames,
                 drawSame         = drawSame,
                 doRatio          = doRatio,
                 atlaslabel       = atlaslabel,
               )

            figures.extend(f)

            f = profile( eff_list,
                 ylimits          = [ (0.0, 1.4) , (0.0, 1.4), (0.0, 1.4), (0.0, 1.4) ] \
                     if not isBackground else [(-0.3,0.1),(-0.3,0.4),(-0.3,0.4),(-0.3, 0.4)],
                 ylabel           = 'Trigger Efficiency',
                 hist_names       = ['eff_et','eff_eta','eff_mu'],
                 legend_header    = '',
                 legend_prefix    = '',
                 legends          = legends,
                 region_label     = '',
                 oname            = localpath+'/'+subdir+'_'+ algnames,
                 drawSame         = False,
                 doRatio          = True,
                 doEffLabel       = doEffLabel,
                 atlaslabel       = atlaslabel,
               )

            figures.extend(f)

        for c in cList:
            self._effReader.gen_table(self._store[c['file']],
                                      c['algname'],
                                      c['subdirs'],
                                      basepath=c['basepath'],
                                      dirname=c['dirname'])

        return figures