Ejemplo n.º 1
0
 def __call__( self, y_score, y_true = NotSet ):
   """
    output -> The output space generated by the classifier.
    target -> The targets which should be returned by the classifier.
   """
   if y_true is NotSet:
     self.sps        = npCurrent.fp_array( y_score[0] )
     self.pds        = npCurrent.fp_array( y_score[1] )
     self.pfs        = npCurrent.fp_array( y_score[2] )
     self.thresholds = npCurrent.fp_array( y_score[3] )
   else:
     # We have to determine what is signal and noise from the datasets using
     # the targets:
     try:
       from sklearn.metrics import roc_curve
     except ImportError:
       # FIXME Can use previous function that we used here as an alternative
       raise ImportError("sklearn is not available, please install it.")
     self.pfs, self.pds, self.thresholds = roc_curve(y_true, y_score, pos_label=1, drop_intermediate=True)
     pds = self.pds
     bps = 1. - self.pfs
     self.sps = np.sqrt( ( pds  + bps )*.5 * np.sqrt( pds * bps ) )
Ejemplo n.º 2
0
 def __discr_to_dict(self, model):
     """
 Transform discriminators to dictionary
 """
     if coreConf() is TuningToolCores.keras:
         hw, hb = model.get_layer(name='dense_2').get_weights()
         ow, ob = model.get_layer(name='dense_3').get_weights()
         discrDict = {
             'nodes':
             npCurrent.int_array([hw.shape[0], hw.shape[1], ow.shape[1]]),
             'weights':
             np.concatenate(
                 [hw.reshape(-1, order='F'),
                  ow.reshape(-1, order='F')]),
             'bias':
             np.concatenate(
                 [hb.reshape(-1, order='F'),
                  ob.reshape(-1, order='F')]),
         }
     elif coreConf() is TuningToolCores.FastNet:
         n = []
         w = []
         b = []
         for l in range(model.getNumLayers()):
             n.append(model.getNumNodes(l))
         for l in range(len(n) - 1):
             for j in range(n[l + 1]):
                 for k in range(n[l]):
                     w.append(model.getWeight(l, j, k))
                 b.append(model.getBias(l, j))
         discrDict = {
             'nodes': npCurrent.int_array(n),
             'weights': npCurrent.fp_array(w),
             'bias': npCurrent.fp_array(b)
         }
     self._debug('Extracted discriminator to raw dictionary.')
     return discrDict
Ejemplo n.º 3
0
    def revert(self, trnData, valData, tstData=None, **kw):
        """
      Revert sort using the training, validation and testing datasets.

      data = cross.revert( trnData, valData[, tstData=None], sort = sortValue)
    """
        from math import floor

        if not self.isRevertible():
            self._logger.fatal(
                "Attempted to revert CrossValidation method which reverse method was not implemented.",
                NotImplementedError)

        try:
            sort = kw.pop('sort')
        except:
            TypeError('Needed argument "sort" not specified')

        data = []

        if not tstData:
            tstData = [npCurrent.fp_array([]) for i in range(len(trnData))]
        for cTrnData, cValData, cTstData in zip(trnData, valData, tstData):
            # Retrieve total number of events:
            evts = cTrnData.shape[npCurrent.odim] \
                 + cValData.shape[npCurrent.odim] \
                 + (cTstData.shape[npCurrent.odim] if cTstData.size else 0)
            # Allocate the numpy array to hold
            cData = npCurrent.fp_zeros(shape=npCurrent.shape(
                npat=cTrnData.shape[npCurrent.pdim], nobs=evts))
            # Calculate the remainder when we do equal splits in nBoxes:
            remainder = evts % self._nBoxes
            # The number of events in each splitted box:
            evtsPerBox = floor(evts / self._nBoxes)
            # Create a holder for the remainder events, which must be in the end of the
            # data array
            remainderData = npCurrent.fp_zeros(shape=npCurrent.shape(
                npat=cTrnData.shape[npCurrent.pdim], nobs=remainder))
            for boxIdx in range(self._nBoxes):
                # Get the indexes where we will put our data in cData:
                cStartPos = boxIdx * evtsPerBox
                cEndPos = cStartPos + evtsPerBox
                # And get the indexes and dataset where we will copy the values from:
                startPos, endPos, ds = self.getBoxPosition(
                    sort,
                    boxIdx,
                    cTrnData,
                    cValData,
                    cTstData,
                    evtsPerBox=evtsPerBox,
                    remainder=remainder)
                # Copy this box values to data:
                cData[npCurrent.access(
                    pidx=':', oidx=slice(cStartPos,
                                         cEndPos))] = ds[npCurrent.access(
                                             pidx=':',
                                             oidx=slice(startPos, endPos))]
                # We also want to copy this box remainder if it exists to the remainder
                # data:
                if boxIdx < remainder:
                    # Take the row added to the end of dataset:
                    remainderData[npCurrent.access(
                        pidx=':',
                        oidx=boxIdx)] = ds[npCurrent.access(pidx=':',
                                                            oidx=endPos)]
            # We finished looping over the boxes, now we copy the remainder data to
            # the last positions of our original data np.array:
            if remainder:
                cData[npCurrent.access(pidx=':',
                                       oidx=slice(evtsPerBox * self._nBoxes,
                                                  None))] = remainderData
            # Finally, append the numpy array holding this class information to the
            # data list:
            data.append(cData)
        return data
Ejemplo n.º 4
0
  def __call__( self, fList, ringerOperation, **kw):
    """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """
    # Offline information branches:
    __offlineBranches = ['el_et',
                         'el_eta',
                         #'el_loose',
                         #'el_medium',
                         #'el_tight',
                         'el_lhLoose',
                         'el_lhMedium',
                         'el_lhTight',
                         'mc_hasMC',
                         'mc_isElectron',
                         'mc_hasZMother',
                         'el_nPileupPrimaryVtx',
                         ]
    # Online information branches
    __onlineBranches = []
    __l2stdCaloBranches = ['trig_L2_calo_et',
                           'trig_L2_calo_eta',
                           'trig_L2_calo_phi',
                           'trig_L2_calo_e237', # rEta
                           'trig_L2_calo_e277', # rEta
                           'trig_L2_calo_fracs1', # F1: fraction sample 1
                           'trig_L2_calo_weta2', # weta2
                           'trig_L2_calo_ehad1', # energy on hadronic sample 1
                           'trig_L2_calo_emaxs1', # eratio
                           'trig_L2_calo_e2tsts1', # eratio
                           'trig_L2_calo_wstot',] # wstot
    __l2trackBranches = [ # Do not add non patter variables on this branch list
                         #'trig_L2_el_pt',
                         #'trig_L2_el_eta',
                         #'trig_L2_el_phi',
                         #'trig_L2_el_caloEta',
                         #'trig_L2_el_charge',
                         #'trig_L2_el_nTRTHits',
                         #'trig_L2_el_nTRTHiThresholdHits',
                         'trig_L2_el_etOverPt',
                         'trig_L2_el_trkClusDeta',
                         'trig_L2_el_trkClusDphi',]
    # Retrieve information from keyword arguments
    filterType            = retrieve_kw(kw, 'filterType',            FilterType.DoNotFilter )
    reference             = retrieve_kw(kw, 'reference',             Reference.Truth        )
    l1EmClusCut           = retrieve_kw(kw, 'l1EmClusCut',           None                   )
    l2EtCut               = retrieve_kw(kw, 'l2EtCut',               None                   )
    efEtCut               = retrieve_kw(kw, 'efEtCut',               None                   )
    offEtCut              = retrieve_kw(kw, 'offEtCut',              None                   )
    treePath              = retrieve_kw(kw, 'treePath',              None                   )
    nClusters             = retrieve_kw(kw, 'nClusters',             None                   )
    getRates              = retrieve_kw(kw, 'getRates',              True                   )
    getRatesOnly          = retrieve_kw(kw, 'getRatesOnly',          False                  )
    etBins                = retrieve_kw(kw, 'etBins',                None                   )
    etaBins               = retrieve_kw(kw, 'etaBins',               None                   )
    crossVal              = retrieve_kw(kw, 'crossVal',              None                   )
    ringConfig            = retrieve_kw(kw, 'ringConfig',            100                    )
    extractDet            = retrieve_kw(kw, 'extractDet',            None                   )
    standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False                  )
    useTRT                = retrieve_kw(kw, 'useTRT',                False                  )
    supportTriggers       = retrieve_kw(kw, 'supportTriggers',       True                   )
    monitoring            = retrieve_kw(kw, 'monitoring',            None                   )
    pileupRef             = retrieve_kw(kw, 'pileupRef',             NotSet                 )
    import ROOT, pkgutil
    #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
    #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
    if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \
       not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ):
        #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0:
      self._fatal("Could not load TuningTools library", ImportError)

    if 'level' in kw: self.level = kw.pop('level')
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._warning )
    del kw
    ### Parse arguments
    # Mutual exclusive arguments:
    if not getRates and getRatesOnly:
      self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.")
      getRates = True
    # Also parse operation, check if its type is string and if we can
    # transform it to the known operation enum:
    fList = csvStr2List ( fList )
    fList = expandFolders( fList )
    ringerOperation = RingerOperation.retrieve(ringerOperation)
    reference = Reference.retrieve(reference)
    if isinstance(l1EmClusCut, str):
      l1EmClusCut = float(l1EmClusCut)
    if l1EmClusCut:
      l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV
      __onlineBranches.append( 'trig_L1_emClus'  )
    if l2EtCut:
      l2EtCut = 1000.*l2EtCut # Put energy in MeV
      __onlineBranches.append( 'trig_L2_calo_et' )
    if efEtCut:
      efEtCut = 1000.*efEtCut # Put energy in MeV
      __onlineBranches.append( 'trig_EF_calo_et' )
    if offEtCut:
      offEtCut = 1000.*offEtCut # Put energy in MeV
      __offlineBranches.append( 'el_et' )
    if not supportTriggers:
      __onlineBranches.append( 'trig_L1_accept' )
    # Check if treePath is None and try to set it automatically
    if treePath is None:
      treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \
                 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH'
    # Check whether using bins
    useBins=False; useEtBins=False; useEtaBins=False
    nEtaBins = 1; nEtBins = 1
    # Set the detector which we should extract the information:
    if extractDet is None:
      if ringerOperation < 0:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2Calo:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2:
        extractDet = Detector.Tracking
      else:
        extractDet = Detector.CaloAndTrack
    else:
      extractDet = Detector.retrieve( extractDet )

    if etaBins is None: etaBins = npCurrent.fp_array([])
    if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins)
    if etBins is None: etBins = npCurrent.fp_array([])
    if type(etBins) is list: etBins=npCurrent.fp_array(etBins)

    if etBins.size:
      etBins = etBins * 1000. # Put energy in MeV
      nEtBins  = len(etBins)-1
      if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of et bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      # Flag that we are separating data through bins
      useBins=True
      useEtBins=True
      self._debug('E_T bins enabled.')

    if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
      ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1
    if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig)
    if not len(ringConfig):
      self._fatal('Rings size must be specified.');

    if etaBins.size:
      nEtaBins = len(etaBins)-1
      if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of eta bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      if len(ringConfig) != nEtaBins:
        self._fatal(('The number of rings configurations (%r) must be equal than '
                            'eta bins (%r) region config'),ringConfig, etaBins)
      useBins=True
      useEtaBins=True
      self._debug('eta bins enabled.')
    else:
      self._debug('eta/et bins disabled.')

    ### Prepare to loop:
    # Open root file
    t = ROOT.TChain(treePath)
    for inputFile in progressbar(fList, len(fList),
                                 logger = self._logger,
                                 prefix = "Creating collection tree "):

      # Check if file exists
      f  = ROOT.TFile.Open(inputFile, 'read')
      if not f or f.IsZombie():
        self._warning('Couldn''t open file: %s', inputFile)
        continue
      # Inform user whether TTree exists, and which options are available:
      self._debug("Adding file: %s", inputFile)
      obj = f.Get(treePath)
      if not obj:
        self._warning("Couldn't retrieve TTree (%s)!", treePath)
        self._info("File available info:")
        f.ReadAll()
        f.ReadKeys()
        f.ls()
        continue
      elif not isinstance(obj, ROOT.TTree):
        self._fatal("%s is not an instance of TTree!", treePath, ValueError)
      t.Add( inputFile )

    # Turn all branches off.
    t.SetBranchStatus("*", False)

    # RingerPhysVal hold the address of required branches
    event = ROOT.RingerPhysVal()

    # Add offline branches, these are always needed
    cPos = 0
    for var in __offlineBranches:
      self.__setBranchAddress(t,var,event)

    # Add online branches if using Trigger
    if ringerOperation > 0:
      for var in __onlineBranches:
        self.__setBranchAddress(t,var,event)


    ## Allocating memory for the number of entries
    entries = t.GetEntries()
    nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                else nClusters

    ## Retrieve the dependent operation variables:
    if useEtBins:
      etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et'
      self.__setBranchAddress(t,etBranch,event)
      self._debug("Added branch: %s", etBranch)
      if not getRatesOnly:
        npEt    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEt    with size %r", npEt.shape)

    if useEtaBins:
      etaBranch    = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta"
      self.__setBranchAddress(t,etaBranch,event)
      self._debug("Added branch: %s", etaBranch)
      if not getRatesOnly:
        npEta    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEta   with size %r", npEta.shape)

    # The base information holder, such as et, eta and pile-up
    if pileupRef is NotSet:
      if ringerOperation > 0:
        pileupRef = PileupReference.avgmu
      else:
        pileupRef = PileupReference.nvtx

    pileupRef = PileupReference.retrieve( pileupRef )

    self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) )

    if pileupRef is PileupReference.nvtx:
      pileupBranch = 'el_nPileupPrimaryVtx'
      pileupDataType = np.uint16
    elif pileupRef is PileupReference.avgmu:
      pileupBranch = 'avgmu'
      pileupDataType = np.float32
    else:
      raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef)
    baseInfoBranch = BaseInfo((etBranch, etaBranch,  pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',),
                              (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) )
    baseInfo = [None, ] * baseInfoBranch.nInfo

    # Make sure all baseInfoBranch information is available:
    for idx in baseInfoBranch:
      self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event)

    # Allocate numpy to hold as many entries as possible:
    if not getRatesOnly:
      # Retrieve the rings information depending on ringer operation
      ringerBranch = "el_ringsE" if ringerOperation < 0 else \
                     "trig_L2_calo_rings"
      self.__setBranchAddress(t,ringerBranch,event)
      if ringerOperation > 0:
        if ringerOperation is RingerOperation.L2:
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
      if standardCaloVariables:
        if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,):
          for var in __l2stdCaloBranches:
            self.__setBranchAddress(t, var, event)
        else:
          self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.",
                               RingerOperation.tostring(ringerOperation))
      t.GetEntry(0)
      npat = 0
      if extractDet in (Detector.Calorimetry,
                        Detector.CaloAndTrack,
                        Detector.All):
        if standardCaloVariables:
          npat+= 6
        else:
          npat += ringConfig.max()
      if extractDet in (Detector.Tracking,
                       Detector.CaloAndTrack,
                       Detector.All):
        if ringerOperation is RingerOperation.L2:
          if useTRT:
            self._info("Using TRT information!")
            npat += 2
            __l2trackBranches.append('trig_L2_el_nTRTHits')
            __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits')
          npat += 3
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
          self.__setBranchAddress(t,"trig_L2_el_pt",event)
        elif ringerOperation < 0: # Offline
          self._warning("Still need to implement tracking for the ringer offline.")
      npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size()
                                                   nobs=nobs)
                                     )
      self._debug("Allocated npPatterns with size %r", npPatterns.shape)

      # Add E_T, eta and luminosity information
      npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) )
                                    for idx in baseInfoBranch]
    else:
      npPatterns = npCurrent.fp_array([])
      npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch]

    ## Allocate the branch efficiency collectors:
    if getRates:
      if ringerOperation < 0:
        benchmarkDict = OrderedDict(
          [(  RingerOperation.Offline_CutBased_Loose  , 'el_loose'            ),
           (  RingerOperation.Offline_CutBased_Medium , 'el_medium'           ),
           (  RingerOperation.Offline_CutBased_Tight  , 'el_tight'            ),
           (  RingerOperation.Offline_LH_Loose        , 'el_lhLoose'          ),
           (  RingerOperation.Offline_LH_Medium       , 'el_lhMedium'         ),
           (  RingerOperation.Offline_LH_Tight        , 'el_lhTight'          ),
          ])
      else:
        benchmarkDict = OrderedDict(
          [( RingerOperation.L2Calo                  , 'trig_L2_calo_accept' ),
           ( RingerOperation.L2                      , 'trig_L2_el_accept'   ),
           ( RingerOperation.EFCalo                  , 'trig_EF_calo_accept' ),
           ( RingerOperation.HLT                     , 'trig_EF_el_accept'   ),
          ])


      from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
      branchEffCollectors = OrderedDict()
      branchCrossEffCollectors = OrderedDict()
      for key, val in benchmarkDict.iteritems():
        branchEffCollectors[key] = list()
        branchCrossEffCollectors[key] = list()
        # Add efficincy branch:
        if getRates or getRatesOnly:
          self.__setBranchAddress(t,val,event)
        for etBin in range(nEtBins):
          if useBins:
            branchEffCollectors[key].append(list())
            branchCrossEffCollectors[key].append(list())
          for etaBin in range(nEtaBins):
            etBinArg = etBin if useBins else -1
            etaBinArg = etaBin if useBins else -1
            argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ]
            branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) )
            if crossVal:
              branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) )
          # etBin
        # etaBin
      # benchmark dict
      if self._logger.isEnabledFor( LoggingLevel.DEBUG ):
        self._debug( 'Retrieved following branch efficiency collectors: %r',
            [collector[0].printName for collector in traverse(branchEffCollectors.values())])
    # end of (getRates)

    etaBin = 0; etBin = 0
    step = int(entries/100) if int(entries/100) > 0 else 1
    ## Start loop!
    self._info("There is available a total of %d entries.", entries)

    for entry in progressbar(range(entries), entries,
                             step = step, logger = self._logger,
                             prefix = "Looping over entries "):

      #self._verbose('Processing eventNumber: %d/%d', entry, entries)
      t.GetEntry(entry)

      # Check if it is needed to remove energy regions (this means that if not
      # within this range, it will be ignored as well for efficiency measuremnet)
      if event.el_et < offEtCut:
        self._verbose("Ignoring entry due to offline E_T cut.")
        continue
      # Add et distribution for all events

      if not monitoring is None:
        # Book all distribtions before the event selection
        self.__fillHistograms(monitoring,filterType,event,False)

      if ringerOperation > 0:
        # Remove events which didn't pass L1_calo
        if not supportTriggers and not event.trig_L1_accept:
          #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept)
          continue
        if event.trig_L1_emClus  < l1EmClusCut:
          #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut)
          continue
        if event.trig_L2_calo_et < l2EtCut:
          #self._verbose("Ignoring entry due to L2Calo E_T cut.")
          continue
        if  efEtCut is not None and event.trig_L2_calo_accept :
          # EF calo is a container, search for electrons objects with et > cut
          trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et)
          found=False
          for v in trig_EF_calo_et_list:
            if v < efEtCut:  found=True
          if found:
            #self._verbose("Ignoring entry due to EFCalo E_T cut.")
            continue

      # Set discriminator target:
      target = Target.Unknown
      if reference is Reference.Truth:
        if event.mc_isElectron and event.mc_hasZMother:
          target = Target.Signal
        elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ):
          target = Target.Background
      elif reference is Reference.Off_Likelihood:
        if event.el_lhTight: target = Target.Signal
        elif not event.el_lhLoose: target = Target.Background
      elif reference is Reference.AcceptAll:
        target = Target.Signal if filterType is FilterType.Signal else Target.Background
      else:
        if event.el_tight: target = Target.Signal
        elif not event.el_loose: target = Target.Background

      # Run filter if it is defined
      if filterType and \
         ( (filterType is FilterType.Signal and target != Target.Signal) or \
           (filterType is FilterType.Background and target != Target.Background) or \
           (target == Target.Unknown) ):
        #self._verbose("Ignoring entry due to filter cut.")
        continue

      # Add et distribution for all events
      if not monitoring is None:
        # Book all distributions after the event selection
        self.__fillHistograms(monitoring,filterType,event,True)

      # Retrieve base information:
      for idx in baseInfoBranch:
        lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
        baseInfo[idx] = lInfo
        if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo
      # Retrieve dependent operation region
      if useEtBins:
        etBin  = self.__retrieveBinIdx( etBins, baseInfo[0] )
      if useEtaBins:
        etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) )


      # Check if bin is within range (when not using bins, this will always be true):
      if (etBin < nEtBins and etaBin < nEtaBins):
        # Retrieve patterns:
        if not getRatesOnly:
          if useEtBins:  npEt[cPos] = etBin
          if useEtaBins: npEta[cPos] = etaBin
          ## Retrieve calorimeter information:
          cPat = 0
          caloAvailable = True
          if extractDet in (Detector.Calorimetry,
                           Detector.CaloAndTrack,
                           Detector.All):
            if standardCaloVariables:
              patterns = []
              if ringerOperation is RingerOperation.L2Calo:
                from math import cosh
                cosh_eta = cosh( event.trig_L2_calo_eta )
                # second layer ratio between 3x7 7x7
                rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277
                base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1
                # Ratio between first and second highest energy cells
                eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0
                # ratio of energy in the first layer (hadronic particles should leave low energy)
                F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta )
                # weta2 is calculated over the middle layer using 3 x 5
                weta2 = event.trig_L2_calo_weta2
                # wstot is calculated over the first layer using (typically) 20 strips
                wstot = event.trig_L2_calo_wstot
                # ratio between EM cluster and first hadronic layers:
                Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et
                # allocate patterns:
                patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1]
                for pat in patterns:
                  npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat
                  cPat += 1
              # end of ringerOperation
            else:
              # Remove events without rings
              if getattr(event,ringerBranch).empty():
                caloAvailable = False
              # Retrieve rings:
              if caloAvailable:
                try:
                  patterns = stdvector_to_list( getattr(event,ringerBranch) )
                  lPat = len(patterns)
                  if lPat == ringConfig[etaBin]:
                    npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns
                  else:
                    oldEtaBin = etaBin
                    if etaBin > 0 and ringConfig[etaBin - 1] == lPat:
                      etaBin -= 1
                    elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat:
                      etaBin += 1
                    npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns
                    self._warning(("Recovered event which should be within eta bin (%d: %r) "
                                          "but was found to be within eta bin (%d: %r). "
                                          "Its read eta value was of %f."),
                                          oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2],
                                          etaBin, etaBins[etaBin:etaBin+2],
                                          np.fabs( getattr(event,etaBranch)))
                except ValueError:
                  self._logger.error(("Patterns size (%d) do not match expected "
                                    "value (%d). This event eta value is: %f, and ringConfig is %r."),
                                    lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig
                                    )
                  continue
              else:
                if extractDet is Detector.Calorimetry:
                  # Also display warning when extracting only calorimetry!
                  self._warning("Rings not available")
                  continue
                self._warning("Rings not available")
                continue
              cPat += ringConfig.max()
            # which calo variables
          # end of (extractDet needed calorimeter)
          # And track information:
          if extractDet in (Detector.Tracking,
                           Detector.CaloAndTrack,
                           Detector.All):
            if caloAvailable or extractDet is Detector.Tracking:
              if ringerOperation is RingerOperation.L2:
                # Retrieve nearest deta/dphi only, so we need to find each one is the nearest:
                if event.trig_L2_el_trkClusDeta.size():
                  clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) )
                  clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) )
                  bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) )
                  for var in __l2trackBranches:
                    npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos]
                    cPat += 1
                else:
                  #self._verbose("Ignoring entry due to track information not available.")
                  continue
                  #for var in __l2trackBranches:
                  #  npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan
                  #  cPat += 1
              elif ringerOperation < 0: # Offline
                pass
            # caloAvailable or only tracking
          # end of (extractDet needs tracking)
        # end of (getRatesOnly)

        ## Retrieve rates information:
        if getRates:
          for branch in branchEffCollectors.itervalues():
            if not useBins:
              branch.update(event)
            else:
              branch[etBin][etaBin].update(event)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.update(event)
              else:
                branchCross[etBin][etaBin].update(event)
        # end of (getRates)

        # We only increment if this cluster will be computed
        cPos += 1
      # end of (et/eta bins)

      # Limit the number of entries to nClusters if desired and possible:
      if not nClusters is None and cPos >= nClusters:
        break
    # for end

    ## Treat the rings information
    if not getRatesOnly:

      ## Remove not filled reserved memory space:
      if npPatterns.shape[npCurrent.odim] > cPos:
        npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim)

      ## Segment data over bins regions:
      # Also remove not filled reserved memory space:
      if useEtBins:
        npEt  = npCurrent.delete( npEt, slice(cPos,None))
      if useEtaBins:
        npEta = npCurrent.delete( npEta, slice(cPos,None))
      # Treat
      npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                  nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                  npPatterns, )
      data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                                      nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                                      npData) for npData in npBaseInfo]
      npBaseInfo = npCurrent.array( data, dtype=np.object )
    else:
      npObject = npCurrent.array([], dtype=npCurrent.dtype)
    # not getRatesOnly

    if getRates:
      if crossVal:
        for etBin in range(nEtBins):
          for etaBin in range(nEtaBins):
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.finished()
              else:
                branchCross[etBin][etaBin].finished()

      # Print efficiency for each one for the efficiency branches analysed:
      for etBin in range(nEtBins) if useBins else range(1):
        for etaBin in range(nEtaBins) if useBins else range(1):
          for branch in branchEffCollectors.itervalues():
            lBranch = branch if not useBins else branch[etBin][etaBin]
            self._info('%s',lBranch)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin]
              lBranchCross.dump(self._debug, printSort = True,
                                 sortFcn = self._verbose)
          # for branch
        # for eta
      # for et
    # end of (getRates)

    outputs = []
    #if not getRatesOnly:
    outputs.extend((npObject, npBaseInfo))
    #if getRates:
    outputs.extend((branchEffCollectors, branchCrossEffCollectors))
    #outputs = tuple(outputs)
    return outputs
Ejemplo n.º 5
0
 def __retrieveBinIdx( self, bins, value ):
   return npCurrent.scounter_dtype.type(np.digitize(npCurrent.fp_array([value]), bins)[0]-1)
Ejemplo n.º 6
0
    def __call__(self, fList, ringerOperation, **kw):
        """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of 
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """

        __eventBranches = [
            'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber',
            'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition',
            'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y',
            'isTagTag'
        ]

        __trackBranches = [
            'elCand2_deltaeta1', 'elCand2_DeltaPOverP',
            'elCand2_deltaphiRescaled', 'elCand2_d0significance',
            'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT'
        ]

        __monteCarloBranches = [
            'type',
            'origin',
            'originbkg',
            'typebkg',
            'isTruthElectronFromZ',
            'TruthParticlePdgId',
            'firstEgMotherPdgId',
            'TruthParticleBarcode',
            'firstEgMotherBarcode',
            'MotherPdgId',
            'MotherBarcode',
            'FirstEgMotherTyp',
            'FirstEgMotherOrigin',
            'dRPdgId',
        ]

        __onlineBranches = ['match', 'ringerMatch', 'ringer_rings']

        __offlineBranches = ['et', 'eta']

        # The current pid map used as offline reference
        pidConfigs = {
            key: value
            for key, value in RingerOperation.efficiencyBranches().iteritems()
            if key in (RingerOperation.Offline_LH_Tight,
                       RingerOperation.Offline_LH_Medium,
                       RingerOperation.Offline_LH_Loose,
                       RingerOperation.Offline_LH_VeryLoose)
        }

        # Retrieve information from keyword arguments
        filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter)
        reference = retrieve_kw(kw, 'reference', Reference.AcceptAll)
        offEtCut = retrieve_kw(kw, 'offEtCut', None)
        l2EtCut = retrieve_kw(kw, 'l2EtCut', None)
        treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate')
        nClusters = retrieve_kw(kw, 'nClusters', None)
        etBins = retrieve_kw(kw, 'etBins', None)
        etaBins = retrieve_kw(kw, 'etaBins', None)
        crossVal = retrieve_kw(kw, 'crossVal', None)
        ringConfig = retrieve_kw(kw, 'ringConfig', 100)
        monitoring = retrieve_kw(kw, 'monitoring', None)
        pileupRef = retrieve_kw(kw, 'pileupRef', NotSet)
        getRates = retrieve_kw(kw, 'getRates', True)
        getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False)
        getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False)
        extractDet = retrieve_kw(kw, 'extractDet', None)

        import ROOT
        #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
        #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
        if ROOT.gSystem.Load('libTuningTools') < 0:
            self._fatal("Could not load TuningTools library", ImportError)

        if 'level' in kw: self.level = kw.pop('level')
        # and delete it to avoid mistakes:
        checkForUnusedVars(kw, self._warning)
        del kw

        ### Parse arguments
        # Also parse operation, check if its type is string and if we can
        # transform it to the known operation enum:
        fList = csvStr2List(fList)
        fList = expandFolders(fList)
        ringerOperation = RingerOperation.retrieve(ringerOperation)
        reference = Reference.retrieve(reference)

        # Offline E_T cut
        if offEtCut:
            offEtCut = 1000. * offEtCut  # Put energy in MeV

        # Check whether using bins
        useBins = False
        useEtBins = False
        useEtaBins = False
        nEtaBins = 1
        nEtBins = 1

        if etaBins is None: etaBins = npCurrent.fp_array([])
        if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins)
        if etBins is None: etBins = npCurrent.fp_array([])
        if type(etBins) is list: etBins = npCurrent.fp_array(etBins)

        if etBins.size:
            etBins = etBins * 1000.  # Put energy in MeV
            nEtBins = len(etBins) - 1
            if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of et bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            # Flag that we are separating data through bins
            useBins = True
            useEtBins = True
            self._debug('E_T bins enabled.')

        if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
            ringConfig = [ringConfig] * (len(etaBins) -
                                         1) if etaBins.size else 1
        if type(ringConfig) is list:
            ringConfig = npCurrent.int_array(ringConfig)
        if not len(ringConfig):
            self._fatal('Rings size must be specified.')

        if etaBins.size:
            nEtaBins = len(etaBins) - 1
            if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of eta bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtaBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            if len(ringConfig) != nEtaBins:
                self._fatal((
                    'The number of rings configurations (%r) must be equal than '
                    'eta bins (%r) region config'), ringConfig, etaBins)
            useBins = True
            useEtaBins = True
            self._debug('eta bins enabled.')
        else:
            self._debug('eta/et bins disabled.')

        # The base information holder, such as et, eta and pile-up
        if pileupRef is NotSet:
            if ringerOperation > 0:
                pileupRef = PileupReference.avgmu
            else:
                pileupRef = PileupReference.nvtx

        pileupRef = PileupReference.retrieve(pileupRef)
        self._info("Using '%s' as pile-up reference.",
                   PileupReference.tostring(pileupRef))

        # Candidates: (1) is tags and (2) is probes. Default is probes
        self._candIdx = 1 if getTagsOnly else 2

        # Mutual exclusive arguments:
        if not getRates and getRatesOnly:
            self._logger.error(
                "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True."
            )
            getRates = True

        ### Prepare to loop:
        t = ROOT.TChain(treePath)
        for inputFile in progressbar(fList,
                                     len(fList),
                                     logger=self._logger,
                                     prefix="Creating collection tree "):
            # Check if file exists
            f = ROOT.TFile.Open(inputFile, 'read')
            if not f or f.IsZombie():
                self._warning('Couldn' 't open file: %s', inputFile)
                continue
            # Inform user whether TTree exists, and which options are available:
            self._debug("Adding file: %s", inputFile)
            obj = f.Get(treePath)
            if not obj:
                self._warning("Couldn't retrieve TTree (%s)!", treePath)
                self._info("File available info:")
                f.ReadAll()
                f.ReadKeys()
                f.ls()
                continue
            elif not isinstance(obj, ROOT.TTree):
                self._fatal("%s is not an instance of TTree!", treePath,
                            ValueError)
            t.Add(inputFile)
        # Turn all branches off.
        t.SetBranchStatus("*", False)
        # RingerPhysVal hold the address of required branches
        event = ROOT.SkimmedNtuple()
        # Ready to retrieve the total number of events
        t.GetEntry(0)
        ## Allocating memory for the number of entries
        entries = t.GetEntries()
        nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                    else nClusters
        ## Retrieve the dependent operation variables:
        if useEtBins:
            etBranch = ('elCand%d_et') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % (
                    self._candIdx)
            self.__setBranchAddress(t, etBranch, event)
            self._debug("Added branch: %s", etBranch)
            npEt = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEt    with size %r", npEt.shape)

        if useEtaBins:
            etaBranch = ('elCand%d_eta') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % (
                    self._candIdx)
            self.__setBranchAddress(t, etaBranch, event)
            self._debug("Added branch: %s", etaBranch)
            npEta = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEta   with size %r", npEta.shape)

        if reference is Reference.Truth:
            self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') %
                                    (self._candIdx), event)

        for var in __offlineBranches:
            self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var),
                                    event)
        #for var in pidConfigs.values():
        #  self.__setBranchAddress(t,var,event)

        for var in __trackBranches:
            self.__setBranchAddress(t, var, event)

        # Add online branches if using Trigger
        if ringerOperation > 0:
            for var in __onlineBranches:
                self.__setBranchAddress(t,
                                        ('fcCand%d_%s') % (self._candIdx, var),
                                        event)
        else:
            self.__setBranchAddress(t, ('elCand%d_%s') %
                                    (self._candIdx, 'ringer_rings'), event)

        if pileupRef is PileupReference.nvtx:
            pileupBranch = 'Nvtx'
            pileupDataType = np.uint16
        elif pileupRef is PileupReference.avgmu:
            pileupBranch = 'averageIntPerXing'
            pileupDataType = np.float32
        else:
            raise NotImplementedError(
                "Pile-up reference %r is not implemented." % pileupRef)

        #for var in __eventBranches +
        for var in [pileupBranch]:
            self.__setBranchAddress(t, var, event)

        ### Allocate memory
        if extractDet == (Detector.Calorimetry):
            npat = ringConfig.max()
        elif extractDet == (Detector.Tracking):
            npat = len(__trackBranches)
        # NOTE: Check if pat is correct for both Calo and track data
        elif extractDet in (Detector.CaloAndTrack, Detector.All):
            npat = ringConfig.max() + len(__trackBranches)

        npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape(
            npat=npat,  #getattr(event, ringerBranch).size()
            nobs=nobs))
        self._debug("Allocated npPatterns with size %r", npPatterns.shape)

        baseInfoBranch = BaseInfo(
            (etBranch, etaBranch, pileupBranch),
            (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType))

        baseInfo = [
            None,
        ] * baseInfoBranch.nInfo
        # Add E_T, eta and luminosity information
        npBaseInfo = [
            npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs),
                            dtype=baseInfoBranch.dtype(idx))
            for idx in baseInfoBranch
        ]

        from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
        branchEffCollectors = OrderedDict()
        branchCrossEffCollectors = OrderedDict()

        if ringerOperation < 0:
            from operator import itemgetter
            benchmarkDict = OrderedDict(
                sorted([(key, value) for key, value in
                        RingerOperation.efficiencyBranches().iteritems()
                        if key < 0 and not (isinstance(value, (list, tuple)))],
                       key=itemgetter(0)))
        else:
            benchmarkDict = OrderedDict()

        for key, val in benchmarkDict.iteritems():
            branchEffCollectors[key] = list()
            branchCrossEffCollectors[key] = list()
            # Add efficincy branch:
            if ringerOperation < 0:
                self.__setBranchAddress(t, val, event)

            for etBin in range(nEtBins):
                if useBins:
                    branchEffCollectors[key].append(list())
                    branchCrossEffCollectors[key].append(list())
                for etaBin in range(nEtaBins):
                    etBinArg = etBin if useBins else -1
                    etaBinArg = etaBin if useBins else -1
                    argList = [
                        RingerOperation.tostring(key), val, etBinArg, etaBinArg
                    ]
                    branchEffCollectors[key][etBin].append(
                        BranchEffCollector(*argList))
                    if crossVal:
                        branchCrossEffCollectors[key][etBin].append(
                            BranchCrossEffCollector(entries, crossVal,
                                                    *argList))
                # etBin
            # etaBin
        # benchmark dict

        if self._logger.isEnabledFor(LoggingLevel.DEBUG):
            self._debug(
                'Retrieved following branch efficiency collectors: %r', [
                    collector[0].printName
                    for collector in traverse(branchEffCollectors.values())
                ])

        etaBin = 0
        etBin = 0
        step = int(entries / 100) if int(entries / 100) > 0 else 1

        ## Start loop!
        self._info("There is available a total of %d entries.", entries)
        cPos = 0

        ### Loop over entries
        for entry in progressbar(range(entries),
                                 entries,
                                 step=step,
                                 logger=self._logger,
                                 prefix="Looping over entries "):

            self._verbose('Processing eventNumber: %d/%d', entry, entries)
            t.GetEntry(entry)

            #print self.__getEt(event)
            if event.elCand2_et < offEtCut:
                self._debug(
                    "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV",
                    event.elCand2_et, offEtCut)
                continue
            # Add et distribution for all events

            if ringerOperation > 0:
                if event.fcCand2_et < l2EtCut:
                    self._debug("Ignoring entry due Fast Calo E_T cut.")
                    continue
                # Add et distribution for all events

            # Set discriminator target:
            target = Target.Unknown
            # Monte Carlo cuts
            if reference is Reference.Truth:
                if getattr(event, ('elCand%d_isTruthElectronFromZ') %
                           (self._candIdx)):
                    target = Target.Signal
                elif not getattr(event, ('elCand%d_isTruthElectronFromZ') %
                                 (self._candIdx)):
                    target = Target.Background
            # Offline Likelihood cuts
            elif reference is Reference.Off_Likelihood:
                if getattr(event,
                           pidConfigs[RingerOperation.Offline_LH_Tight]):
                    target = Target.Signal
                elif not getattr(
                        event,
                        pidConfigs[RingerOperation.Offline_LH_VeryLoose]):
                    target = Target.Background
            # By pass everything (Default)
            elif reference is Reference.AcceptAll:
                target = Target.Signal if filterType is FilterType.Signal else Target.Background

            # Run filter if it is defined
            if filterType and \
               ( (filterType is FilterType.Signal and target != Target.Signal) or \
                 (filterType is FilterType.Background and target != Target.Background) or \
                 (target == Target.Unknown) ):
                #self._verbose("Ignoring entry due to filter cut.")
                continue

            ## Retrieve base information and rings:
            for idx in baseInfoBranch:
                lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
                baseInfo[idx] = lInfo
            # Retrieve dependent operation region
            if useEtBins:
                etBin = self.__retrieveBinIdx(etBins, baseInfo[0])
            if useEtaBins:
                etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1]))

            # Check if bin is within range (when not using bins, this will always be true):
            if (etBin < nEtBins and etaBin < nEtaBins):

                if useEtBins: npEt[cPos] = etBin
                if useEtaBins: npEta[cPos] = etaBin
                # Online operation
                cPat = 0
                caloAvailable = True
                if ringerOperation > 0 and self.__get_ringer_onMatch(
                        event) < 1:
                    continue
                # TODO Treat case where we don't use rings energy
                # Check if the rings empty
                if self.__get_rings_energy(event, ringerOperation).empty():
                    self._debug(
                        'No rings available in this event. Skipping...')
                    caloAvailable = False

                # Retrieve rings:
                if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack,
                                  Detector.All):
                    if caloAvailable:
                        try:
                            pass
                            patterns = stdvector_to_list(
                                self.__get_rings_energy(
                                    event, ringerOperation))
                            lPat = len(patterns)
                            if lPat == ringConfig[etaBin]:
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                            else:
                                oldEtaBin = etaBin
                                if etaBin > 0 and ringConfig[etaBin -
                                                             1] == lPat:
                                    etaBin -= 1
                                elif etaBin + 1 < len(
                                        ringConfig) and ringConfig[etaBin +
                                                                   1] == lPat:
                                    etaBin += 1
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                                self._warning((
                                    "Recovered event which should be within eta bin (%d: %r) "
                                    "but was found to be within eta bin (%d: %r). "
                                    "Its read eta value was of %f."),
                                              oldEtaBin,
                                              etaBins[oldEtaBin:oldEtaBin + 2],
                                              etaBin,
                                              etaBins[etaBin:etaBin + 2],
                                              np.fabs(getattr(
                                                  event, etaBranch)))
                        except ValueError:
                            self._logger.error((
                                "Patterns size (%d) do not match expected "
                                "value (%d). This event eta value is: %f, and ringConfig is %r."
                            ), lPat, ringConfig[etaBin],
                                               np.fabs(
                                                   getattr(event, etaBranch)),
                                               ringConfig)
                            continue
                        cPat += ringConfig[etaBin]
                    else:
                        # Also display warning when extracting only calorimetry!
                        self._warning("Rings not available")
                        continue

                if extractDet in (Detector.Tracking, Detector.CaloAndTrack,
                                  Detector.All):
                    for var in __trackBranches:
                        npPatterns[npCurrent.access(pidx=cPat,
                                                    oidx=cPos)] = getattr(
                                                        event, var)
                        if var == 'elCand2_eProbabilityHT':
                            from math import log
                            TRT_PID = npPatterns[npCurrent.access(pidx=cPat,
                                                                  oidx=cPos)]
                            epsilon = 1e-99
                            if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15
                            elif TRT_PID <= 0.0: TRT_PID = epsilon
                            tau = 15.0
                            TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0)
                            npPatterns[npCurrent.access(pidx=cPat,
                                                        oidx=cPos)] = TRT_PID
                        cPat += 1

                ## Retrieve rates information:
                if getRates and ringerOperation < 0:
                    #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 )
                    event.elCand2_isEMLoose2015 = not (
                        event.elCand2_isEMLoose2015 & 34896)
                    event.elCand2_isEMMedium2015 = not (
                        event.elCand2_isEMMedium2015 & 276858960)
                    event.elCand2_isEMTight2015 = not (
                        event.elCand2_isEMTight2015 & 281053264)

                    for branch in branchEffCollectors.itervalues():
                        if not useBins:
                            branch.update(event)
                        else:
                            branch[etBin][etaBin].update(event)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.update(event)
                            else:
                                branchCross[etBin][etaBin].update(event)
                # end of (getRates)

                if not monitoring is None:
                    self.__fillHistograms(monitoring, filterType, pileupRef,
                                          pidConfigs, event)

                # We only increment if this cluster will be computed
                cPos += 1
            # end of (et/eta bins)

            # Limit the number of entries to nClusters if desired and possible:
            if not nClusters is None and cPos >= nClusters:
                break
        # for end

        ## Treat the rings information
        ## Remove not filled reserved memory space:
        if npPatterns.shape[npCurrent.odim] > cPos:
            npPatterns = np.delete(npPatterns,
                                   slice(cPos, None),
                                   axis=npCurrent.odim)

        ## Segment data over bins regions:
        # Also remove not filled reserved memory space:
        if useEtBins:
            npEt = npCurrent.delete(npEt, slice(cPos, None))
        if useEtaBins:
            npEta = npCurrent.delete(npEta, slice(cPos, None))

        # Treat
        standardCaloVariables = False
        npObject = self.treatNpInfo(
            cPos,
            npEt,
            npEta,
            useEtBins,
            useEtaBins,
            nEtBins,
            nEtaBins,
            standardCaloVariables,
            ringConfig,
            npPatterns,
        )

        data = [
            self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins,
                             nEtaBins, standardCaloVariables, ringConfig,
                             npData) for npData in npBaseInfo
        ]
        npBaseInfo = npCurrent.array(data, dtype=np.object)

        if getRates:
            if crossVal:
                for etBin in range(nEtBins):
                    for etaBin in range(nEtaBins):
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.finished()
                            else:
                                branchCross[etBin][etaBin].finished()

            # Print efficiency for each one for the efficiency branches analysed:
            for etBin in range(nEtBins) if useBins else range(1):
                for etaBin in range(nEtaBins) if useBins else range(1):
                    for branch in branchEffCollectors.itervalues():
                        lBranch = branch if not useBins else branch[etBin][
                            etaBin]
                        self._info('%s', lBranch)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            lBranchCross = branchCross if not useBins else branchCross[
                                etBin][etaBin]
                            lBranchCross.dump(self._debug,
                                              printSort=True,
                                              sortFcn=self._verbose)
                    # for branch
                # for eta
            # for et
        else:
            branchEffCollectors = None
            branchCrossEffCollectors = None
        # end of (getRates)

        outputs = []
        outputs.extend((npObject, npBaseInfo))
        if getRates:
            outputs.extend((branchEffCollectors, branchCrossEffCollectors))

        return outputs
Ejemplo n.º 7
0
    def trainC_Exp(self):
        """
      Train expert feedforward neural network
    """
        if coreConf() is TuningToolCores.ExMachina:
            self._fatal("Expert Neural Networks not implemented for ExMachina")
        elif coreConf() is TuningToolCores.FastNet:
            self._fatal("Expert Neural Networks not implemented for FastNet")
        elif coreConf() is TuningToolCores.keras:
            from copy import deepcopy

            # Set batch size:
            if self.batchMethod is BatchSizeMethod.MinClassSize:
                self.__batchSize(self._bkgSize if self._sgnSize > self._bkgSize
                                 else self._sgnSize)
            elif self.batchMethod is BatchSizeMethod.HalfSizeSignalClass:
                self.__batchSize(self._sgnSize // 2)
            elif self.batchMethod is BatchSizeMethod.OneSample:
                self.__batchSize(1)

            references = ['SP', 'Pd', 'Pf']

            # Holder of the discriminators:
            tunedDiscrList = []
            tuningInfo = {}

            for idx, ref in enumerate(references):
                rawDictTempl = {'discriminator': None, 'benchmark': None}

                history = self._model[ref].fit(
                    self._trnData,
                    self._trnTarget,
                    epochs=self.trainOptions['nEpochs'],
                    batch_size=self.batchSize,
                    callbacks=[self._historyCallback, self._earlyStopping]
                    #, callbacks       = [self._earlyStopping]
                    ,
                    verbose=0,
                    validation_data=(self._valData, self._valTarget),
                    shuffle=self.trainOptions['shuffle'])
                # Retrieve raw network
                rawDictTempl['discriminator'] = self.__expDiscr_to_dict(
                    self._model[ref])
                rawDictTempl['benchmark'] = self.references[idx]
                tunedDiscrList.append(deepcopy(rawDictTempl))
                tuningInfo[ref] = DataTrainEvolution(history).toRawObj()

                try:
                    from sklearn.metrics import roc_curve
                except ImportError:
                    # FIXME Can use previous function that we used here as an alternative
                    raise ImportError(
                        "sklearn is not available, please install it.")

                # Retrieve performance:
                opRoc, tstRoc = Roc(), Roc()
                for idx, tunedDiscrDict in enumerate(tunedDiscrList):
                    discr = tunedDiscrDict['discriminator']
                    if self.doPerf:
                        self._debug('Retrieving performance for %s networks.' %
                                    (ref))
                        # propagate inputs:
                        trnOutput = self._model[ref].predict(self._trnData)
                        valOutput = self._model[ref].predict(self._valData)
                        tstOutput = self._model[ref].predict(
                            self._tstData
                        ) if self._tstData else npCurrent.fp_array([])
                        try:
                            allOutput = np.concatenate(
                                [trnOutput, valOutput, tstOutput])
                            allTarget = np.concatenate([
                                self._trnTarget, self._valTarget,
                                self._tstTarget
                            ])
                        except ValueError:
                            allOutput = np.concatenate([trnOutput, valOutput])
                            allTarget = np.concatenate(
                                [self._trnTarget, self._valTarget])
                        # Retrieve Rocs:
                        opRoc(allOutput, allTarget)
                        if self._tstData: tstRoc(tstOutput, self._tstTarget)
                        else: tstRoc(valOutput, self._valTarget)
                        # Add rocs to output information
                        # TODO Change this to raw object
                        tunedDiscrDict['summaryInfo'] = {
                            'roc_operation': opRoc.toRawObj(),
                            'roc_test': tstRoc.toRawObj()
                        }

                        for ref2 in self.references:
                            opPoint = opRoc.retrieve(ref2)
                            tstPoint = tstRoc.retrieve(ref2)
                            # Print information:
                            self._info(
                                '%s NETWORKS Operation (%s): sp = %f, pd = %f, pf = %f, thres = %f',
                                ref, ref2.name, opPoint.sp_value,
                                opPoint.pd_value, opPoint.pf_value,
                                opPoint.thres_value)
                            self._info(
                                '%s NETWORKS Test (%s): sp = %f, pd = %f, pf = %f, thres = %f',
                                ref, ref2.name, tstPoint.sp_value,
                                tstPoint.pd_value, tstPoint.pf_value,
                                tstPoint.thres_value)
                self._info("Finished trainC_Exp for %s networks." % (ref))

        self._debug("Finished trainC_Exp on python side.")

        return tunedDiscrList, tuningInfo
Ejemplo n.º 8
0
    def train_c(self):
        """
      Train feedforward neural network
    """
        from copy import deepcopy
        # Holder of the discriminators:
        tunedDiscrList = []
        tuningInfo = {}

        # Set batch size:
        if self.batchMethod is BatchSizeMethod.MinClassSize:
            self.__batchSize(self._bkgSize if self._sgnSize > self._bkgSize
                             else self._sgnSize)
        elif self.batchMethod is BatchSizeMethod.HalfSizeSignalClass:
            self.__batchSize(self._sgnSize // 2)
        elif self.batchMethod is BatchSizeMethod.OneSample:
            self.__batchSize(1)

        rawDictTempl = {'discriminator': None, 'benchmark': None}

        if coreConf() is TuningToolCores.keras:
            history = self._model.fit(
                self._trnData,
                self._trnTarget
                #, epochs          = self.trainOptions['nEpochs']
                ,
                batch_size=self.batchSize
                #, callbacks      = [self._historyCallback, self._earlyStopping]
                ,
                callbacks=[self._earlyStopping],
                verbose=0,
                validation_data=(self._valData, self._valTarget),
                shuffle=self.trainOptions['shuffle'])
            # Retrieve raw network
            rawDictTempl['discriminator'] = self.__discr_to_dict(self._model)
            rawDictTempl['benchmark'] = self.references[0]
            tunedDiscrList.append(deepcopy(rawDictTempl))
            tuningInfo = DataTrainEvolution(history).toRawObj()

            try:
                from sklearn.metrics import roc_curve
            except ImportError:
                # FIXME Can use previous function that we used here as an alternative
                raise ImportError(
                    "sklearn is not available, please install it.")

        elif coreConf() is TuningToolCores.FastNet:
            self._debug('executing train_c')
            [discriminatorPyWrapperList,
             trainDataPyWrapperList] = self._core.train_c()
            self._debug('finished train_c')
            # Transform model tolist of  dict

            if self.doMultiStop:
                for idx, discr in enumerate(discriminatorPyWrapperList):
                    rawDictTempl['discriminator'] = self.__discr_to_dict(discr)
                    rawDictTempl['benchmark'] = self.references[idx]
                    # FIXME This will need to be improved if set to tune for multiple
                    # Pd and Pf values.
                    tunedDiscrList.append(deepcopy(rawDictTempl))
            else:
                rawDictTempl['discriminator'] = self.__discr_to_dict(
                    discriminatorPyWrapperList[0])
                rawDictTempl['benchmark'] = self.references[0]
                if self.useTstEfficiencyAsRef and self.sortIdx is not None:
                    rawDictTempl['sortIdx'] = self.sortIdx
                tunedDiscrList.append(deepcopy(rawDictTempl))
            tuningInfo = DataTrainEvolution(trainDataPyWrapperList).toRawObj()
            # TODO
        # cores

        # Retrieve performance:
        opRoc, tstRoc = Roc(), Roc()
        for idx, tunedDiscrDict in enumerate(tunedDiscrList):
            discr = tunedDiscrDict['discriminator']
            if self.doPerf:
                self._debug('Retrieving performance.')
                if coreConf() is TuningToolCores.keras:
                    # propagate inputs:
                    trnOutput = self._model.predict(self._trnData)
                    valOutput = self._model.predict(self._valData)
                    tstOutput = self._model.predict(
                        self._tstData
                    ) if self._tstData else npCurrent.fp_array([])
                    try:
                        allOutput = np.concatenate(
                            [trnOutput, valOutput, tstOutput])
                        allTarget = np.concatenate([
                            self._trnTarget, self._valTarget, self._tstTarget
                        ])
                    except ValueError:
                        allOutput = np.concatenate([trnOutput, valOutput])
                        allTarget = np.concatenate(
                            [self._trnTarget, self._valTarget])
                    # Retrieve Rocs:
                    opRoc(allOutput, allTarget)
                    if self._tstData: tstRoc(tstOutput, self._tstTarget)
                    else: tstRoc(valOutput, self._valTarget)
                elif coreConf() is TuningToolCores.FastNet:
                    perfList = self._core.valid_c(
                        discriminatorPyWrapperList[idx])
                    opRoc(perfList[1])
                    tstRoc(perfList[0])
                # Add rocs to output information
                # TODO Change this to raw object
                tunedDiscrDict['summaryInfo'] = {
                    'roc_operation': opRoc.toRawObj(),
                    'roc_test': tstRoc.toRawObj()
                }

                for ref in self.references:
                    if coreConf() is TuningToolCores.FastNet:
                        # FastNet won't loop on this, this is just looping for keras right now
                        ref = tunedDiscrDict['benchmark']

                    opPoint = opRoc.retrieve(ref)
                    tstPoint = tstRoc.retrieve(ref)
                    # Print information:
                    self._info(
                        'Operation (%s): sp = %f, pd = %f, pf = %f, thres = %f',
                        ref.name, opPoint.sp_value, opPoint.pd_value,
                        opPoint.pf_value, opPoint.thres_value)
                    self._info(
                        'Test (%s): sp = %f, pd = %f, pf = %f, thres = %f',
                        ref.name, tstPoint.sp_value, tstPoint.pd_value,
                        tstPoint.pf_value, tstPoint.thres_value)

                    if coreConf() is TuningToolCores.FastNet:
                        break

        self._debug("Finished train_c on python side.")

        return tunedDiscrList, tuningInfo
Ejemplo n.º 9
0
 def __init__(self, **kw):
     Logger.__init__(self, kw)
     self.references = ReferenceBenchmarkCollection([])
     coreframe = coreConf.core_framework()
     self.doPerf = retrieve_kw(kw, 'doPerf', True)
     self.batchMethod           = BatchSizeMethod.retrieve(
                                retrieve_kw( kw, 'batchMethod', BatchSizeMethod.MinClassSize \
         if not 'batchSize' in kw or kw['batchSize'] is NotSet else BatchSizeMethod.Manual         )
                                  )
     self.batchSize = retrieve_kw(kw, 'batchSize', NotSet)
     epochs = retrieve_kw(kw, 'epochs', 10000)
     maxFail = retrieve_kw(kw, 'maxFail', 50)
     self.useTstEfficiencyAsRef = retrieve_kw(kw, 'useTstEfficiencyAsRef',
                                              False)
     self._merged = retrieve_kw(kw, 'merged', False)
     self.networks = retrieve_kw(kw, 'networks', NotSet)
     self.sortIdx = None
     if coreConf() is TuningToolCores.FastNet:
         seed = retrieve_kw(kw, 'seed', None)
         self._core = coreframe(level=LoggingLevel.toC(self.level),
                                seed=seed)
         self._core.trainFcn = retrieve_kw(kw, 'algorithmName', 'trainrp')
         self._core.showEvo = retrieve_kw(kw, 'showEvo', 50)
         self._core.multiStop = retrieve_kw(kw, 'doMultiStop', True)
         self._core.epochs = epochs
         self._core.maxFail = maxFail
         # TODO Add properties
     elif coreConf() is TuningToolCores.keras:
         self._core = coreframe
         from keras import callbacks
         from keras.optimizers import RMSprop, SGD
         from TuningTools.keras_util.callbacks import PerformanceHistory
         self.trainOptions = dict()
         self.trainOptions['optmin_alg'] = retrieve_kw(
             kw, 'optmin_alg', RMSprop(lr=0.001, rho=0.9, epsilon=1e-08))
         #self.trainOptions['optmin_alg']    = retrieve_kw( kw, 'optmin_alg',    SGD(lr=0.1, decay=1e-6, momentum=0.7)  )
         self.trainOptions['costFunction'] = retrieve_kw(
             kw, 'binary_crossentropy', 'mean_squared_error'
         )  # 'binary_crossentropy' #'mean_squared_error' #
         self.trainOptions['metrics'] = retrieve_kw(kw, 'metrics', [
             'accuracy',
         ])
         self.trainOptions['shuffle'] = retrieve_kw(kw, 'shuffle', True)
         self._multiStop = retrieve_kw(kw, 'doMultiStop', True)
         self.trainOptions['nEpochs'] = epochs
         self.trainOptions['nFails'] = maxFail
         #self._earlyStopping = callbacks.EarlyStopping( monitor='val_Tuning_L2Calo_SP_sp_value' #  val_loss, self.trainOptions['metrics'][0] FIXME This must change
         #                                             , patience=self.trainOptions['nFails']
         #                                             , verbose=0
         #                                             , mode='max')
         self._earlyStopping = callbacks.EarlyStopping(
             monitor='val_loss'  # val_acc
             ,
             patience=self.trainOptions['nFails'],
             verbose=0,
             mode='auto')
         self._historyCallback = PerformanceHistory(
             display=retrieve_kw(kw, 'showEvo', 50))
     else:
         self._fatal("TuningWrapper not implemented for %s", coreConf)
     checkForUnusedVars(kw, self._debug)
     del kw
     # Set default empty values:
     if coreConf() is TuningToolCores.keras:
         self._emptyData = npCurrent.fp_array([])
     elif coreConf() is TuningToolCores.FastNet:
         self._emptyData = list()
     self._emptyHandler = None
     if coreConf() is TuningToolCores.keras:
         self._emptyTarget = npCurrent.fp_array([[]]).reshape(
             npCurrent.access(pidx=1, oidx=0))
     elif coreConf() is TuningToolCores.FastNet:
         self._emptyTarget = None
     # Set holders:
     self._trnData = self._emptyData
     self._valData = self._emptyData
     self._tstData = self._emptyData
     self._trnHandler = self._emptyHandler
     self._valHandler = self._emptyHandler
     self._tstHandler = self._emptyHandler
     self._trnTarget = self._emptyTarget
     self._valTarget = self._emptyTarget
     self._tstTarget = self._emptyTarget