Example #1
0
 def auto(self):
     self._debug(
         "Using automatic configuration for dataframe specification.")
     # Check whether we can retrieve from the parser.
     from TuningTools.parsers.BaseModuleParser import dataframeParser
     import sys
     try:
         args, argv = dataframeParser.parse_known_args()
         if args.data_framework not in (None, NotSet):
             self.dataframe = args.data_framework
             # Consume option
             sys.argv = sys.argv[:1] + argv
     except (ArgumentError, ValueError) as e:
         self._debug("Ignored argument parsing error:\n %s", e)
         pass
     from RingerCore import csvStr2List, expandFolders
     if not self.configured() and not self.can_autoconfigure():
         self._fatal(
             "Cannot auto-configure which dataframe to use because no sample was specified via the auto_retrieve_sample() method."
         )
     elif not self.configured():
         if isinstance(self._sample, dict):
             for key in self._sample:
                 if 'elCand2_' in key:
                     self.dataframe = DataframeEnum.SkimmedNtuple
                 else:
                     self.dataframe = DataframeEnum.PhysVal
                 break
         elif self._sample and isinstance(self._sample, list):
             if not isinstance(self._sample[0], basestring):
                 self._fatal(
                     "Cannot autoconfigure dataframe using the following list: %r",
                     self._sample)
             fList = csvStr2List(self._sample[0])
             fList = expandFolders(fList)
             for inputFile in fList:
                 self._checkFile(inputFile)
                 if self.configured(): break
         elif isinstance(self._sample, basestring):
             if os.path.isdir(self._sample):
                 fList = expandFolders(self._sample)
                 for inputFile in fList:
                     self._checkFile(inputFile)
                     if self.configured(): break
             else:
                 self._checkFile(self._sample)
         if not self.configured():
             self._fatal("Couldn't autoconfigure using source: %r",
                         self._sample)
Example #2
0
    def __init__(self, **kw):

        Logger.__init__(self, kw)
        # Retrieve all information needed
        self._fList = retrieve_kw(kw, 'inputFiles', NotSet)
        self._ofile = retrieve_kw(kw, 'outputFile', "histos.root")
        self._treePath = retrieve_kw(kw, 'treePath', NotSet)
        self._dataframe = retrieve_kw(kw, 'dataframe',
                                      DataframeEnum.SkimmedNtuple)
        self._nov = retrieve_kw(kw, 'nov', -1)
        self._fList = csvStr2List(self._fList)
        self._fList = expandFolders(self._fList)

        # Loading libraries
        if ROOT.gSystem.Load('libTuningTools') < 0:
            self._fatal("Could not load TuningTools library", ImportError)

        self._containersSvc = {}
        self._storegateSvc = NotSet
        import random
        import time
        random.seed(time.time())
        # return a random number
        self._id = random.randrange(100000)
Example #3
0
  def __call__( self, fList, ringerOperation, **kw):
    """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """
    # Offline information branches:
    __offlineBranches = ['el_et',
                         'el_eta',
                         #'el_loose',
                         #'el_medium',
                         #'el_tight',
                         'el_lhLoose',
                         'el_lhMedium',
                         'el_lhTight',
                         'mc_hasMC',
                         'mc_isElectron',
                         'mc_hasZMother',
                         'el_nPileupPrimaryVtx',
                         ]
    # Online information branches
    __onlineBranches = []
    __l2stdCaloBranches = ['trig_L2_calo_et',
                           'trig_L2_calo_eta',
                           'trig_L2_calo_phi',
                           'trig_L2_calo_e237', # rEta
                           'trig_L2_calo_e277', # rEta
                           'trig_L2_calo_fracs1', # F1: fraction sample 1
                           'trig_L2_calo_weta2', # weta2
                           'trig_L2_calo_ehad1', # energy on hadronic sample 1
                           'trig_L2_calo_emaxs1', # eratio
                           'trig_L2_calo_e2tsts1', # eratio
                           'trig_L2_calo_wstot',] # wstot
    __l2trackBranches = [ # Do not add non patter variables on this branch list
                         #'trig_L2_el_pt',
                         #'trig_L2_el_eta',
                         #'trig_L2_el_phi',
                         #'trig_L2_el_caloEta',
                         #'trig_L2_el_charge',
                         #'trig_L2_el_nTRTHits',
                         #'trig_L2_el_nTRTHiThresholdHits',
                         'trig_L2_el_etOverPt',
                         'trig_L2_el_trkClusDeta',
                         'trig_L2_el_trkClusDphi',]
    # Retrieve information from keyword arguments
    filterType            = retrieve_kw(kw, 'filterType',            FilterType.DoNotFilter )
    reference             = retrieve_kw(kw, 'reference',             Reference.Truth        )
    l1EmClusCut           = retrieve_kw(kw, 'l1EmClusCut',           None                   )
    l2EtCut               = retrieve_kw(kw, 'l2EtCut',               None                   )
    efEtCut               = retrieve_kw(kw, 'efEtCut',               None                   )
    offEtCut              = retrieve_kw(kw, 'offEtCut',              None                   )
    treePath              = retrieve_kw(kw, 'treePath',              None                   )
    nClusters             = retrieve_kw(kw, 'nClusters',             None                   )
    getRates              = retrieve_kw(kw, 'getRates',              True                   )
    getRatesOnly          = retrieve_kw(kw, 'getRatesOnly',          False                  )
    etBins                = retrieve_kw(kw, 'etBins',                None                   )
    etaBins               = retrieve_kw(kw, 'etaBins',               None                   )
    crossVal              = retrieve_kw(kw, 'crossVal',              None                   )
    ringConfig            = retrieve_kw(kw, 'ringConfig',            100                    )
    extractDet            = retrieve_kw(kw, 'extractDet',            None                   )
    standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False                  )
    useTRT                = retrieve_kw(kw, 'useTRT',                False                  )
    supportTriggers       = retrieve_kw(kw, 'supportTriggers',       True                   )
    monitoring            = retrieve_kw(kw, 'monitoring',            None                   )
    pileupRef             = retrieve_kw(kw, 'pileupRef',             NotSet                 )
    import ROOT, pkgutil
    #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
    #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
    if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \
       not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ):
        #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0:
      self._fatal("Could not load TuningTools library", ImportError)

    if 'level' in kw: self.level = kw.pop('level')
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._warning )
    del kw
    ### Parse arguments
    # Mutual exclusive arguments:
    if not getRates and getRatesOnly:
      self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.")
      getRates = True
    # Also parse operation, check if its type is string and if we can
    # transform it to the known operation enum:
    fList = csvStr2List ( fList )
    fList = expandFolders( fList )
    ringerOperation = RingerOperation.retrieve(ringerOperation)
    reference = Reference.retrieve(reference)
    if isinstance(l1EmClusCut, str):
      l1EmClusCut = float(l1EmClusCut)
    if l1EmClusCut:
      l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV
      __onlineBranches.append( 'trig_L1_emClus'  )
    if l2EtCut:
      l2EtCut = 1000.*l2EtCut # Put energy in MeV
      __onlineBranches.append( 'trig_L2_calo_et' )
    if efEtCut:
      efEtCut = 1000.*efEtCut # Put energy in MeV
      __onlineBranches.append( 'trig_EF_calo_et' )
    if offEtCut:
      offEtCut = 1000.*offEtCut # Put energy in MeV
      __offlineBranches.append( 'el_et' )
    if not supportTriggers:
      __onlineBranches.append( 'trig_L1_accept' )
    # Check if treePath is None and try to set it automatically
    if treePath is None:
      treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \
                 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH'
    # Check whether using bins
    useBins=False; useEtBins=False; useEtaBins=False
    nEtaBins = 1; nEtBins = 1
    # Set the detector which we should extract the information:
    if extractDet is None:
      if ringerOperation < 0:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2Calo:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2:
        extractDet = Detector.Tracking
      else:
        extractDet = Detector.CaloAndTrack
    else:
      extractDet = Detector.retrieve( extractDet )

    if etaBins is None: etaBins = npCurrent.fp_array([])
    if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins)
    if etBins is None: etBins = npCurrent.fp_array([])
    if type(etBins) is list: etBins=npCurrent.fp_array(etBins)

    if etBins.size:
      etBins = etBins * 1000. # Put energy in MeV
      nEtBins  = len(etBins)-1
      if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of et bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      # Flag that we are separating data through bins
      useBins=True
      useEtBins=True
      self._debug('E_T bins enabled.')

    if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
      ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1
    if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig)
    if not len(ringConfig):
      self._fatal('Rings size must be specified.');

    if etaBins.size:
      nEtaBins = len(etaBins)-1
      if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of eta bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      if len(ringConfig) != nEtaBins:
        self._fatal(('The number of rings configurations (%r) must be equal than '
                            'eta bins (%r) region config'),ringConfig, etaBins)
      useBins=True
      useEtaBins=True
      self._debug('eta bins enabled.')
    else:
      self._debug('eta/et bins disabled.')

    ### Prepare to loop:
    # Open root file
    t = ROOT.TChain(treePath)
    for inputFile in progressbar(fList, len(fList),
                                 logger = self._logger,
                                 prefix = "Creating collection tree "):

      # Check if file exists
      f  = ROOT.TFile.Open(inputFile, 'read')
      if not f or f.IsZombie():
        self._warning('Couldn''t open file: %s', inputFile)
        continue
      # Inform user whether TTree exists, and which options are available:
      self._debug("Adding file: %s", inputFile)
      obj = f.Get(treePath)
      if not obj:
        self._warning("Couldn't retrieve TTree (%s)!", treePath)
        self._info("File available info:")
        f.ReadAll()
        f.ReadKeys()
        f.ls()
        continue
      elif not isinstance(obj, ROOT.TTree):
        self._fatal("%s is not an instance of TTree!", treePath, ValueError)
      t.Add( inputFile )

    # Turn all branches off.
    t.SetBranchStatus("*", False)

    # RingerPhysVal hold the address of required branches
    event = ROOT.RingerPhysVal()

    # Add offline branches, these are always needed
    cPos = 0
    for var in __offlineBranches:
      self.__setBranchAddress(t,var,event)

    # Add online branches if using Trigger
    if ringerOperation > 0:
      for var in __onlineBranches:
        self.__setBranchAddress(t,var,event)


    ## Allocating memory for the number of entries
    entries = t.GetEntries()
    nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                else nClusters

    ## Retrieve the dependent operation variables:
    if useEtBins:
      etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et'
      self.__setBranchAddress(t,etBranch,event)
      self._debug("Added branch: %s", etBranch)
      if not getRatesOnly:
        npEt    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEt    with size %r", npEt.shape)

    if useEtaBins:
      etaBranch    = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta"
      self.__setBranchAddress(t,etaBranch,event)
      self._debug("Added branch: %s", etaBranch)
      if not getRatesOnly:
        npEta    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEta   with size %r", npEta.shape)

    # The base information holder, such as et, eta and pile-up
    if pileupRef is NotSet:
      if ringerOperation > 0:
        pileupRef = PileupReference.avgmu
      else:
        pileupRef = PileupReference.nvtx

    pileupRef = PileupReference.retrieve( pileupRef )

    self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) )

    if pileupRef is PileupReference.nvtx:
      pileupBranch = 'el_nPileupPrimaryVtx'
      pileupDataType = np.uint16
    elif pileupRef is PileupReference.avgmu:
      pileupBranch = 'avgmu'
      pileupDataType = np.float32
    else:
      raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef)
    baseInfoBranch = BaseInfo((etBranch, etaBranch,  pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',),
                              (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) )
    baseInfo = [None, ] * baseInfoBranch.nInfo

    # Make sure all baseInfoBranch information is available:
    for idx in baseInfoBranch:
      self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event)

    # Allocate numpy to hold as many entries as possible:
    if not getRatesOnly:
      # Retrieve the rings information depending on ringer operation
      ringerBranch = "el_ringsE" if ringerOperation < 0 else \
                     "trig_L2_calo_rings"
      self.__setBranchAddress(t,ringerBranch,event)
      if ringerOperation > 0:
        if ringerOperation is RingerOperation.L2:
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
      if standardCaloVariables:
        if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,):
          for var in __l2stdCaloBranches:
            self.__setBranchAddress(t, var, event)
        else:
          self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.",
                               RingerOperation.tostring(ringerOperation))
      t.GetEntry(0)
      npat = 0
      if extractDet in (Detector.Calorimetry,
                        Detector.CaloAndTrack,
                        Detector.All):
        if standardCaloVariables:
          npat+= 6
        else:
          npat += ringConfig.max()
      if extractDet in (Detector.Tracking,
                       Detector.CaloAndTrack,
                       Detector.All):
        if ringerOperation is RingerOperation.L2:
          if useTRT:
            self._info("Using TRT information!")
            npat += 2
            __l2trackBranches.append('trig_L2_el_nTRTHits')
            __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits')
          npat += 3
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
          self.__setBranchAddress(t,"trig_L2_el_pt",event)
        elif ringerOperation < 0: # Offline
          self._warning("Still need to implement tracking for the ringer offline.")
      npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size()
                                                   nobs=nobs)
                                     )
      self._debug("Allocated npPatterns with size %r", npPatterns.shape)

      # Add E_T, eta and luminosity information
      npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) )
                                    for idx in baseInfoBranch]
    else:
      npPatterns = npCurrent.fp_array([])
      npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch]

    ## Allocate the branch efficiency collectors:
    if getRates:
      if ringerOperation < 0:
        benchmarkDict = OrderedDict(
          [(  RingerOperation.Offline_CutBased_Loose  , 'el_loose'            ),
           (  RingerOperation.Offline_CutBased_Medium , 'el_medium'           ),
           (  RingerOperation.Offline_CutBased_Tight  , 'el_tight'            ),
           (  RingerOperation.Offline_LH_Loose        , 'el_lhLoose'          ),
           (  RingerOperation.Offline_LH_Medium       , 'el_lhMedium'         ),
           (  RingerOperation.Offline_LH_Tight        , 'el_lhTight'          ),
          ])
      else:
        benchmarkDict = OrderedDict(
          [( RingerOperation.L2Calo                  , 'trig_L2_calo_accept' ),
           ( RingerOperation.L2                      , 'trig_L2_el_accept'   ),
           ( RingerOperation.EFCalo                  , 'trig_EF_calo_accept' ),
           ( RingerOperation.HLT                     , 'trig_EF_el_accept'   ),
          ])


      from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
      branchEffCollectors = OrderedDict()
      branchCrossEffCollectors = OrderedDict()
      for key, val in benchmarkDict.iteritems():
        branchEffCollectors[key] = list()
        branchCrossEffCollectors[key] = list()
        # Add efficincy branch:
        if getRates or getRatesOnly:
          self.__setBranchAddress(t,val,event)
        for etBin in range(nEtBins):
          if useBins:
            branchEffCollectors[key].append(list())
            branchCrossEffCollectors[key].append(list())
          for etaBin in range(nEtaBins):
            etBinArg = etBin if useBins else -1
            etaBinArg = etaBin if useBins else -1
            argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ]
            branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) )
            if crossVal:
              branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) )
          # etBin
        # etaBin
      # benchmark dict
      if self._logger.isEnabledFor( LoggingLevel.DEBUG ):
        self._debug( 'Retrieved following branch efficiency collectors: %r',
            [collector[0].printName for collector in traverse(branchEffCollectors.values())])
    # end of (getRates)

    etaBin = 0; etBin = 0
    step = int(entries/100) if int(entries/100) > 0 else 1
    ## Start loop!
    self._info("There is available a total of %d entries.", entries)

    for entry in progressbar(range(entries), entries,
                             step = step, logger = self._logger,
                             prefix = "Looping over entries "):

      #self._verbose('Processing eventNumber: %d/%d', entry, entries)
      t.GetEntry(entry)

      # Check if it is needed to remove energy regions (this means that if not
      # within this range, it will be ignored as well for efficiency measuremnet)
      if event.el_et < offEtCut:
        self._verbose("Ignoring entry due to offline E_T cut.")
        continue
      # Add et distribution for all events

      if not monitoring is None:
        # Book all distribtions before the event selection
        self.__fillHistograms(monitoring,filterType,event,False)

      if ringerOperation > 0:
        # Remove events which didn't pass L1_calo
        if not supportTriggers and not event.trig_L1_accept:
          #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept)
          continue
        if event.trig_L1_emClus  < l1EmClusCut:
          #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut)
          continue
        if event.trig_L2_calo_et < l2EtCut:
          #self._verbose("Ignoring entry due to L2Calo E_T cut.")
          continue
        if  efEtCut is not None and event.trig_L2_calo_accept :
          # EF calo is a container, search for electrons objects with et > cut
          trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et)
          found=False
          for v in trig_EF_calo_et_list:
            if v < efEtCut:  found=True
          if found:
            #self._verbose("Ignoring entry due to EFCalo E_T cut.")
            continue

      # Set discriminator target:
      target = Target.Unknown
      if reference is Reference.Truth:
        if event.mc_isElectron and event.mc_hasZMother:
          target = Target.Signal
        elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ):
          target = Target.Background
      elif reference is Reference.Off_Likelihood:
        if event.el_lhTight: target = Target.Signal
        elif not event.el_lhLoose: target = Target.Background
      elif reference is Reference.AcceptAll:
        target = Target.Signal if filterType is FilterType.Signal else Target.Background
      else:
        if event.el_tight: target = Target.Signal
        elif not event.el_loose: target = Target.Background

      # Run filter if it is defined
      if filterType and \
         ( (filterType is FilterType.Signal and target != Target.Signal) or \
           (filterType is FilterType.Background and target != Target.Background) or \
           (target == Target.Unknown) ):
        #self._verbose("Ignoring entry due to filter cut.")
        continue

      # Add et distribution for all events
      if not monitoring is None:
        # Book all distributions after the event selection
        self.__fillHistograms(monitoring,filterType,event,True)

      # Retrieve base information:
      for idx in baseInfoBranch:
        lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
        baseInfo[idx] = lInfo
        if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo
      # Retrieve dependent operation region
      if useEtBins:
        etBin  = self.__retrieveBinIdx( etBins, baseInfo[0] )
      if useEtaBins:
        etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) )


      # Check if bin is within range (when not using bins, this will always be true):
      if (etBin < nEtBins and etaBin < nEtaBins):
        # Retrieve patterns:
        if not getRatesOnly:
          if useEtBins:  npEt[cPos] = etBin
          if useEtaBins: npEta[cPos] = etaBin
          ## Retrieve calorimeter information:
          cPat = 0
          caloAvailable = True
          if extractDet in (Detector.Calorimetry,
                           Detector.CaloAndTrack,
                           Detector.All):
            if standardCaloVariables:
              patterns = []
              if ringerOperation is RingerOperation.L2Calo:
                from math import cosh
                cosh_eta = cosh( event.trig_L2_calo_eta )
                # second layer ratio between 3x7 7x7
                rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277
                base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1
                # Ratio between first and second highest energy cells
                eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0
                # ratio of energy in the first layer (hadronic particles should leave low energy)
                F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta )
                # weta2 is calculated over the middle layer using 3 x 5
                weta2 = event.trig_L2_calo_weta2
                # wstot is calculated over the first layer using (typically) 20 strips
                wstot = event.trig_L2_calo_wstot
                # ratio between EM cluster and first hadronic layers:
                Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et
                # allocate patterns:
                patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1]
                for pat in patterns:
                  npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat
                  cPat += 1
              # end of ringerOperation
            else:
              # Remove events without rings
              if getattr(event,ringerBranch).empty():
                caloAvailable = False
              # Retrieve rings:
              if caloAvailable:
                try:
                  patterns = stdvector_to_list( getattr(event,ringerBranch) )
                  lPat = len(patterns)
                  if lPat == ringConfig[etaBin]:
                    npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns
                  else:
                    oldEtaBin = etaBin
                    if etaBin > 0 and ringConfig[etaBin - 1] == lPat:
                      etaBin -= 1
                    elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat:
                      etaBin += 1
                    npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns
                    self._warning(("Recovered event which should be within eta bin (%d: %r) "
                                          "but was found to be within eta bin (%d: %r). "
                                          "Its read eta value was of %f."),
                                          oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2],
                                          etaBin, etaBins[etaBin:etaBin+2],
                                          np.fabs( getattr(event,etaBranch)))
                except ValueError:
                  self._logger.error(("Patterns size (%d) do not match expected "
                                    "value (%d). This event eta value is: %f, and ringConfig is %r."),
                                    lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig
                                    )
                  continue
              else:
                if extractDet is Detector.Calorimetry:
                  # Also display warning when extracting only calorimetry!
                  self._warning("Rings not available")
                  continue
                self._warning("Rings not available")
                continue
              cPat += ringConfig.max()
            # which calo variables
          # end of (extractDet needed calorimeter)
          # And track information:
          if extractDet in (Detector.Tracking,
                           Detector.CaloAndTrack,
                           Detector.All):
            if caloAvailable or extractDet is Detector.Tracking:
              if ringerOperation is RingerOperation.L2:
                # Retrieve nearest deta/dphi only, so we need to find each one is the nearest:
                if event.trig_L2_el_trkClusDeta.size():
                  clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) )
                  clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) )
                  bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) )
                  for var in __l2trackBranches:
                    npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos]
                    cPat += 1
                else:
                  #self._verbose("Ignoring entry due to track information not available.")
                  continue
                  #for var in __l2trackBranches:
                  #  npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan
                  #  cPat += 1
              elif ringerOperation < 0: # Offline
                pass
            # caloAvailable or only tracking
          # end of (extractDet needs tracking)
        # end of (getRatesOnly)

        ## Retrieve rates information:
        if getRates:
          for branch in branchEffCollectors.itervalues():
            if not useBins:
              branch.update(event)
            else:
              branch[etBin][etaBin].update(event)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.update(event)
              else:
                branchCross[etBin][etaBin].update(event)
        # end of (getRates)

        # We only increment if this cluster will be computed
        cPos += 1
      # end of (et/eta bins)

      # Limit the number of entries to nClusters if desired and possible:
      if not nClusters is None and cPos >= nClusters:
        break
    # for end

    ## Treat the rings information
    if not getRatesOnly:

      ## Remove not filled reserved memory space:
      if npPatterns.shape[npCurrent.odim] > cPos:
        npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim)

      ## Segment data over bins regions:
      # Also remove not filled reserved memory space:
      if useEtBins:
        npEt  = npCurrent.delete( npEt, slice(cPos,None))
      if useEtaBins:
        npEta = npCurrent.delete( npEta, slice(cPos,None))
      # Treat
      npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                  nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                  npPatterns, )
      data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                                      nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                                      npData) for npData in npBaseInfo]
      npBaseInfo = npCurrent.array( data, dtype=np.object )
    else:
      npObject = npCurrent.array([], dtype=npCurrent.dtype)
    # not getRatesOnly

    if getRates:
      if crossVal:
        for etBin in range(nEtBins):
          for etaBin in range(nEtaBins):
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.finished()
              else:
                branchCross[etBin][etaBin].finished()

      # Print efficiency for each one for the efficiency branches analysed:
      for etBin in range(nEtBins) if useBins else range(1):
        for etaBin in range(nEtaBins) if useBins else range(1):
          for branch in branchEffCollectors.itervalues():
            lBranch = branch if not useBins else branch[etBin][etaBin]
            self._info('%s',lBranch)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin]
              lBranchCross.dump(self._debug, printSort = True,
                                 sortFcn = self._verbose)
          # for branch
        # for eta
      # for et
    # end of (getRates)

    outputs = []
    #if not getRatesOnly:
    outputs.extend((npObject, npBaseInfo))
    #if getRates:
    outputs.extend((branchEffCollectors, branchCrossEffCollectors))
    #outputs = tuple(outputs)
    return outputs
Example #4
0
parser.add_argument('-d',
                    '--data',
                    action='store',
                    dest='data',
                    required=True,
                    nargs='+',
                    help="The input tuning files.")

import sys, os
if len(sys.argv) == 1:
    parser.print_help()
    sys.exit(1)
args = parser.parse_args()

# Take all files
paths = csvStr2List(args.data)
paths = expandFolders(paths)

from RingerCore import load, save, appendToFileName
for f in paths:
    ff = load(f)
    for k in ff.keys():
        if 'SP' in k:
            etBin = ff[k]['etBinIdx']
            etaBin = ff[k]['etaBinIdx']
    print 'etBin = ', etBin, ', etaBin = ', etaBin
    outname = f.split('/')[len(f.split('/')) - 2]
    cOutputName = appendToFileName(outname, ('et%d_eta%d') % (etBin, etaBin))
    save(ff, cOutputName, compress=True)
Example #5
0
    def __call__(self, fList, ringerOperation, **kw):
        """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of 
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """

        __eventBranches = [
            'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber',
            'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition',
            'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y',
            'isTagTag'
        ]

        __trackBranches = [
            'elCand2_deltaeta1', 'elCand2_DeltaPOverP',
            'elCand2_deltaphiRescaled', 'elCand2_d0significance',
            'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT'
        ]

        __monteCarloBranches = [
            'type',
            'origin',
            'originbkg',
            'typebkg',
            'isTruthElectronFromZ',
            'TruthParticlePdgId',
            'firstEgMotherPdgId',
            'TruthParticleBarcode',
            'firstEgMotherBarcode',
            'MotherPdgId',
            'MotherBarcode',
            'FirstEgMotherTyp',
            'FirstEgMotherOrigin',
            'dRPdgId',
        ]

        __onlineBranches = ['match', 'ringerMatch', 'ringer_rings']

        __offlineBranches = ['et', 'eta']

        # The current pid map used as offline reference
        pidConfigs = {
            key: value
            for key, value in RingerOperation.efficiencyBranches().iteritems()
            if key in (RingerOperation.Offline_LH_Tight,
                       RingerOperation.Offline_LH_Medium,
                       RingerOperation.Offline_LH_Loose,
                       RingerOperation.Offline_LH_VeryLoose)
        }

        # Retrieve information from keyword arguments
        filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter)
        reference = retrieve_kw(kw, 'reference', Reference.AcceptAll)
        offEtCut = retrieve_kw(kw, 'offEtCut', None)
        l2EtCut = retrieve_kw(kw, 'l2EtCut', None)
        treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate')
        nClusters = retrieve_kw(kw, 'nClusters', None)
        etBins = retrieve_kw(kw, 'etBins', None)
        etaBins = retrieve_kw(kw, 'etaBins', None)
        crossVal = retrieve_kw(kw, 'crossVal', None)
        ringConfig = retrieve_kw(kw, 'ringConfig', 100)
        monitoring = retrieve_kw(kw, 'monitoring', None)
        pileupRef = retrieve_kw(kw, 'pileupRef', NotSet)
        getRates = retrieve_kw(kw, 'getRates', True)
        getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False)
        getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False)
        extractDet = retrieve_kw(kw, 'extractDet', None)

        import ROOT
        #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
        #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
        if ROOT.gSystem.Load('libTuningTools') < 0:
            self._fatal("Could not load TuningTools library", ImportError)

        if 'level' in kw: self.level = kw.pop('level')
        # and delete it to avoid mistakes:
        checkForUnusedVars(kw, self._warning)
        del kw

        ### Parse arguments
        # Also parse operation, check if its type is string and if we can
        # transform it to the known operation enum:
        fList = csvStr2List(fList)
        fList = expandFolders(fList)
        ringerOperation = RingerOperation.retrieve(ringerOperation)
        reference = Reference.retrieve(reference)

        # Offline E_T cut
        if offEtCut:
            offEtCut = 1000. * offEtCut  # Put energy in MeV

        # Check whether using bins
        useBins = False
        useEtBins = False
        useEtaBins = False
        nEtaBins = 1
        nEtBins = 1

        if etaBins is None: etaBins = npCurrent.fp_array([])
        if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins)
        if etBins is None: etBins = npCurrent.fp_array([])
        if type(etBins) is list: etBins = npCurrent.fp_array(etBins)

        if etBins.size:
            etBins = etBins * 1000.  # Put energy in MeV
            nEtBins = len(etBins) - 1
            if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of et bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            # Flag that we are separating data through bins
            useBins = True
            useEtBins = True
            self._debug('E_T bins enabled.')

        if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
            ringConfig = [ringConfig] * (len(etaBins) -
                                         1) if etaBins.size else 1
        if type(ringConfig) is list:
            ringConfig = npCurrent.int_array(ringConfig)
        if not len(ringConfig):
            self._fatal('Rings size must be specified.')

        if etaBins.size:
            nEtaBins = len(etaBins) - 1
            if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of eta bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtaBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            if len(ringConfig) != nEtaBins:
                self._fatal((
                    'The number of rings configurations (%r) must be equal than '
                    'eta bins (%r) region config'), ringConfig, etaBins)
            useBins = True
            useEtaBins = True
            self._debug('eta bins enabled.')
        else:
            self._debug('eta/et bins disabled.')

        # The base information holder, such as et, eta and pile-up
        if pileupRef is NotSet:
            if ringerOperation > 0:
                pileupRef = PileupReference.avgmu
            else:
                pileupRef = PileupReference.nvtx

        pileupRef = PileupReference.retrieve(pileupRef)
        self._info("Using '%s' as pile-up reference.",
                   PileupReference.tostring(pileupRef))

        # Candidates: (1) is tags and (2) is probes. Default is probes
        self._candIdx = 1 if getTagsOnly else 2

        # Mutual exclusive arguments:
        if not getRates and getRatesOnly:
            self._logger.error(
                "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True."
            )
            getRates = True

        ### Prepare to loop:
        t = ROOT.TChain(treePath)
        for inputFile in progressbar(fList,
                                     len(fList),
                                     logger=self._logger,
                                     prefix="Creating collection tree "):
            # Check if file exists
            f = ROOT.TFile.Open(inputFile, 'read')
            if not f or f.IsZombie():
                self._warning('Couldn' 't open file: %s', inputFile)
                continue
            # Inform user whether TTree exists, and which options are available:
            self._debug("Adding file: %s", inputFile)
            obj = f.Get(treePath)
            if not obj:
                self._warning("Couldn't retrieve TTree (%s)!", treePath)
                self._info("File available info:")
                f.ReadAll()
                f.ReadKeys()
                f.ls()
                continue
            elif not isinstance(obj, ROOT.TTree):
                self._fatal("%s is not an instance of TTree!", treePath,
                            ValueError)
            t.Add(inputFile)
        # Turn all branches off.
        t.SetBranchStatus("*", False)
        # RingerPhysVal hold the address of required branches
        event = ROOT.SkimmedNtuple()
        # Ready to retrieve the total number of events
        t.GetEntry(0)
        ## Allocating memory for the number of entries
        entries = t.GetEntries()
        nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                    else nClusters
        ## Retrieve the dependent operation variables:
        if useEtBins:
            etBranch = ('elCand%d_et') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % (
                    self._candIdx)
            self.__setBranchAddress(t, etBranch, event)
            self._debug("Added branch: %s", etBranch)
            npEt = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEt    with size %r", npEt.shape)

        if useEtaBins:
            etaBranch = ('elCand%d_eta') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % (
                    self._candIdx)
            self.__setBranchAddress(t, etaBranch, event)
            self._debug("Added branch: %s", etaBranch)
            npEta = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEta   with size %r", npEta.shape)

        if reference is Reference.Truth:
            self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') %
                                    (self._candIdx), event)

        for var in __offlineBranches:
            self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var),
                                    event)
        #for var in pidConfigs.values():
        #  self.__setBranchAddress(t,var,event)

        for var in __trackBranches:
            self.__setBranchAddress(t, var, event)

        # Add online branches if using Trigger
        if ringerOperation > 0:
            for var in __onlineBranches:
                self.__setBranchAddress(t,
                                        ('fcCand%d_%s') % (self._candIdx, var),
                                        event)
        else:
            self.__setBranchAddress(t, ('elCand%d_%s') %
                                    (self._candIdx, 'ringer_rings'), event)

        if pileupRef is PileupReference.nvtx:
            pileupBranch = 'Nvtx'
            pileupDataType = np.uint16
        elif pileupRef is PileupReference.avgmu:
            pileupBranch = 'averageIntPerXing'
            pileupDataType = np.float32
        else:
            raise NotImplementedError(
                "Pile-up reference %r is not implemented." % pileupRef)

        #for var in __eventBranches +
        for var in [pileupBranch]:
            self.__setBranchAddress(t, var, event)

        ### Allocate memory
        if extractDet == (Detector.Calorimetry):
            npat = ringConfig.max()
        elif extractDet == (Detector.Tracking):
            npat = len(__trackBranches)
        # NOTE: Check if pat is correct for both Calo and track data
        elif extractDet in (Detector.CaloAndTrack, Detector.All):
            npat = ringConfig.max() + len(__trackBranches)

        npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape(
            npat=npat,  #getattr(event, ringerBranch).size()
            nobs=nobs))
        self._debug("Allocated npPatterns with size %r", npPatterns.shape)

        baseInfoBranch = BaseInfo(
            (etBranch, etaBranch, pileupBranch),
            (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType))

        baseInfo = [
            None,
        ] * baseInfoBranch.nInfo
        # Add E_T, eta and luminosity information
        npBaseInfo = [
            npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs),
                            dtype=baseInfoBranch.dtype(idx))
            for idx in baseInfoBranch
        ]

        from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
        branchEffCollectors = OrderedDict()
        branchCrossEffCollectors = OrderedDict()

        if ringerOperation < 0:
            from operator import itemgetter
            benchmarkDict = OrderedDict(
                sorted([(key, value) for key, value in
                        RingerOperation.efficiencyBranches().iteritems()
                        if key < 0 and not (isinstance(value, (list, tuple)))],
                       key=itemgetter(0)))
        else:
            benchmarkDict = OrderedDict()

        for key, val in benchmarkDict.iteritems():
            branchEffCollectors[key] = list()
            branchCrossEffCollectors[key] = list()
            # Add efficincy branch:
            if ringerOperation < 0:
                self.__setBranchAddress(t, val, event)

            for etBin in range(nEtBins):
                if useBins:
                    branchEffCollectors[key].append(list())
                    branchCrossEffCollectors[key].append(list())
                for etaBin in range(nEtaBins):
                    etBinArg = etBin if useBins else -1
                    etaBinArg = etaBin if useBins else -1
                    argList = [
                        RingerOperation.tostring(key), val, etBinArg, etaBinArg
                    ]
                    branchEffCollectors[key][etBin].append(
                        BranchEffCollector(*argList))
                    if crossVal:
                        branchCrossEffCollectors[key][etBin].append(
                            BranchCrossEffCollector(entries, crossVal,
                                                    *argList))
                # etBin
            # etaBin
        # benchmark dict

        if self._logger.isEnabledFor(LoggingLevel.DEBUG):
            self._debug(
                'Retrieved following branch efficiency collectors: %r', [
                    collector[0].printName
                    for collector in traverse(branchEffCollectors.values())
                ])

        etaBin = 0
        etBin = 0
        step = int(entries / 100) if int(entries / 100) > 0 else 1

        ## Start loop!
        self._info("There is available a total of %d entries.", entries)
        cPos = 0

        ### Loop over entries
        for entry in progressbar(range(entries),
                                 entries,
                                 step=step,
                                 logger=self._logger,
                                 prefix="Looping over entries "):

            self._verbose('Processing eventNumber: %d/%d', entry, entries)
            t.GetEntry(entry)

            #print self.__getEt(event)
            if event.elCand2_et < offEtCut:
                self._debug(
                    "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV",
                    event.elCand2_et, offEtCut)
                continue
            # Add et distribution for all events

            if ringerOperation > 0:
                if event.fcCand2_et < l2EtCut:
                    self._debug("Ignoring entry due Fast Calo E_T cut.")
                    continue
                # Add et distribution for all events

            # Set discriminator target:
            target = Target.Unknown
            # Monte Carlo cuts
            if reference is Reference.Truth:
                if getattr(event, ('elCand%d_isTruthElectronFromZ') %
                           (self._candIdx)):
                    target = Target.Signal
                elif not getattr(event, ('elCand%d_isTruthElectronFromZ') %
                                 (self._candIdx)):
                    target = Target.Background
            # Offline Likelihood cuts
            elif reference is Reference.Off_Likelihood:
                if getattr(event,
                           pidConfigs[RingerOperation.Offline_LH_Tight]):
                    target = Target.Signal
                elif not getattr(
                        event,
                        pidConfigs[RingerOperation.Offline_LH_VeryLoose]):
                    target = Target.Background
            # By pass everything (Default)
            elif reference is Reference.AcceptAll:
                target = Target.Signal if filterType is FilterType.Signal else Target.Background

            # Run filter if it is defined
            if filterType and \
               ( (filterType is FilterType.Signal and target != Target.Signal) or \
                 (filterType is FilterType.Background and target != Target.Background) or \
                 (target == Target.Unknown) ):
                #self._verbose("Ignoring entry due to filter cut.")
                continue

            ## Retrieve base information and rings:
            for idx in baseInfoBranch:
                lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
                baseInfo[idx] = lInfo
            # Retrieve dependent operation region
            if useEtBins:
                etBin = self.__retrieveBinIdx(etBins, baseInfo[0])
            if useEtaBins:
                etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1]))

            # Check if bin is within range (when not using bins, this will always be true):
            if (etBin < nEtBins and etaBin < nEtaBins):

                if useEtBins: npEt[cPos] = etBin
                if useEtaBins: npEta[cPos] = etaBin
                # Online operation
                cPat = 0
                caloAvailable = True
                if ringerOperation > 0 and self.__get_ringer_onMatch(
                        event) < 1:
                    continue
                # TODO Treat case where we don't use rings energy
                # Check if the rings empty
                if self.__get_rings_energy(event, ringerOperation).empty():
                    self._debug(
                        'No rings available in this event. Skipping...')
                    caloAvailable = False

                # Retrieve rings:
                if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack,
                                  Detector.All):
                    if caloAvailable:
                        try:
                            pass
                            patterns = stdvector_to_list(
                                self.__get_rings_energy(
                                    event, ringerOperation))
                            lPat = len(patterns)
                            if lPat == ringConfig[etaBin]:
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                            else:
                                oldEtaBin = etaBin
                                if etaBin > 0 and ringConfig[etaBin -
                                                             1] == lPat:
                                    etaBin -= 1
                                elif etaBin + 1 < len(
                                        ringConfig) and ringConfig[etaBin +
                                                                   1] == lPat:
                                    etaBin += 1
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                                self._warning((
                                    "Recovered event which should be within eta bin (%d: %r) "
                                    "but was found to be within eta bin (%d: %r). "
                                    "Its read eta value was of %f."),
                                              oldEtaBin,
                                              etaBins[oldEtaBin:oldEtaBin + 2],
                                              etaBin,
                                              etaBins[etaBin:etaBin + 2],
                                              np.fabs(getattr(
                                                  event, etaBranch)))
                        except ValueError:
                            self._logger.error((
                                "Patterns size (%d) do not match expected "
                                "value (%d). This event eta value is: %f, and ringConfig is %r."
                            ), lPat, ringConfig[etaBin],
                                               np.fabs(
                                                   getattr(event, etaBranch)),
                                               ringConfig)
                            continue
                        cPat += ringConfig[etaBin]
                    else:
                        # Also display warning when extracting only calorimetry!
                        self._warning("Rings not available")
                        continue

                if extractDet in (Detector.Tracking, Detector.CaloAndTrack,
                                  Detector.All):
                    for var in __trackBranches:
                        npPatterns[npCurrent.access(pidx=cPat,
                                                    oidx=cPos)] = getattr(
                                                        event, var)
                        if var == 'elCand2_eProbabilityHT':
                            from math import log
                            TRT_PID = npPatterns[npCurrent.access(pidx=cPat,
                                                                  oidx=cPos)]
                            epsilon = 1e-99
                            if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15
                            elif TRT_PID <= 0.0: TRT_PID = epsilon
                            tau = 15.0
                            TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0)
                            npPatterns[npCurrent.access(pidx=cPat,
                                                        oidx=cPos)] = TRT_PID
                        cPat += 1

                ## Retrieve rates information:
                if getRates and ringerOperation < 0:
                    #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 )
                    event.elCand2_isEMLoose2015 = not (
                        event.elCand2_isEMLoose2015 & 34896)
                    event.elCand2_isEMMedium2015 = not (
                        event.elCand2_isEMMedium2015 & 276858960)
                    event.elCand2_isEMTight2015 = not (
                        event.elCand2_isEMTight2015 & 281053264)

                    for branch in branchEffCollectors.itervalues():
                        if not useBins:
                            branch.update(event)
                        else:
                            branch[etBin][etaBin].update(event)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.update(event)
                            else:
                                branchCross[etBin][etaBin].update(event)
                # end of (getRates)

                if not monitoring is None:
                    self.__fillHistograms(monitoring, filterType, pileupRef,
                                          pidConfigs, event)

                # We only increment if this cluster will be computed
                cPos += 1
            # end of (et/eta bins)

            # Limit the number of entries to nClusters if desired and possible:
            if not nClusters is None and cPos >= nClusters:
                break
        # for end

        ## Treat the rings information
        ## Remove not filled reserved memory space:
        if npPatterns.shape[npCurrent.odim] > cPos:
            npPatterns = np.delete(npPatterns,
                                   slice(cPos, None),
                                   axis=npCurrent.odim)

        ## Segment data over bins regions:
        # Also remove not filled reserved memory space:
        if useEtBins:
            npEt = npCurrent.delete(npEt, slice(cPos, None))
        if useEtaBins:
            npEta = npCurrent.delete(npEta, slice(cPos, None))

        # Treat
        standardCaloVariables = False
        npObject = self.treatNpInfo(
            cPos,
            npEt,
            npEta,
            useEtBins,
            useEtaBins,
            nEtBins,
            nEtaBins,
            standardCaloVariables,
            ringConfig,
            npPatterns,
        )

        data = [
            self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins,
                             nEtaBins, standardCaloVariables, ringConfig,
                             npData) for npData in npBaseInfo
        ]
        npBaseInfo = npCurrent.array(data, dtype=np.object)

        if getRates:
            if crossVal:
                for etBin in range(nEtBins):
                    for etaBin in range(nEtaBins):
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.finished()
                            else:
                                branchCross[etBin][etaBin].finished()

            # Print efficiency for each one for the efficiency branches analysed:
            for etBin in range(nEtBins) if useBins else range(1):
                for etaBin in range(nEtaBins) if useBins else range(1):
                    for branch in branchEffCollectors.itervalues():
                        lBranch = branch if not useBins else branch[etBin][
                            etaBin]
                        self._info('%s', lBranch)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            lBranchCross = branchCross if not useBins else branchCross[
                                etBin][etaBin]
                            lBranchCross.dump(self._debug,
                                              printSort=True,
                                              sortFcn=self._verbose)
                    # for branch
                # for eta
            # for et
        else:
            branchEffCollectors = None
            branchCrossEffCollectors = None
        # end of (getRates)

        outputs = []
        outputs.extend((npObject, npBaseInfo))
        if getRates:
            outputs.extend((branchEffCollectors, branchCrossEffCollectors))

        return outputs
Example #6
0


import sys, os
if len(sys.argv)==1:
  mainFilterParser.print_help()
  sys.exit(1)

mainLogger = Logger.getModuleLogger( __name__, LoggingLevel.INFO )
mainLogger.info('Start ntuple extraction...')
# retrieve args
args=mainFilterParser.parse_args()

# Treat special arguments
if len( args.inputFiles ) == 1:
  args.inputFiles = csvStr2List( args.inputFiles[0] )

args.inputFiles = expandFolders( args.inputFiles )
mainLogger.verbose("All input files are:")
pprint(args.inputFiles)

if '*' in args.output:
  output = args.output.replace('*', args.trigger.replace('HLT_',''))
else:
  output = args.output

# Copy the tree to an slim file
obj  = CopyTree( output )
if obj( args.inputFiles, args.basepath, args.trigger, args.treename) :
  obj.save()
else:
Example #7
0
parser = ArgumentParser(description='Save files on matlab format.',
                        parents=[mainParser, loggerParser],
                        conflict_handler='resolve')
parser.make_adjustments()

emptyArgumentsPrintHelp(parser)

## Retrieve parser args:
args = parser.parse_args(namespace=LoggerNamespace())
mainLogger.setLevel(args.output_level)
if mainLogger.isEnabledFor(LoggingLevel.DEBUG):
    from pprint import pprint
    pprint(args.inputFiles)
## Treat special arguments
if len(args.inputFiles) == 1:
    args.inputFiles = csvStr2List(args.inputFiles[0])
args.inputFiles = expandFolders(args.inputFiles)
mainLogger.verbose("All input files are:")
if mainLogger.isEnabledFor(LoggingLevel.VERBOSE):
    pprint(args.inputFiles)

for inFile in progressbar(args.inputFiles,
                          len(args.inputFiles),
                          logger=mainLogger,
                          prefix="Processing files "):
    # Treat output file name:
    from RingerCore import checkExtension, changeExtension, load, save
    if checkExtension(inFile, "tgz|tar.gz|pic"):
        cOutputName = changeExtension(inFile, '.mat')
        if args.change_output_folder:
            import os.path
Example #8
0
mainLogger = Logger.getModuleLogger(__name__)

import sys
if len(sys.argv)==1:
  parser.print_help()
  sys.exit(1)

## Retrieve parser args:
args = parser.parse_args( namespace = LoggerNamespace() )
mainLogger.debug("Raw input files are:")
if mainLogger.isEnabledFor( LoggingLevel.DEBUG ):
  pprint(args.inputFiles)
## Treat special arguments
if len( args.inputFiles ) == 1:
  args.inputFiles = csvStr2List( args.inputFiles[0] )
args.inputFiles = expandFolders( args.inputFiles )
mainLogger.verbose("All input files are:")
if mainLogger.isEnabledFor( LoggingLevel.VERBOSE ):
  pprint(args.inputFiles)
if args.binFilters is not NotSet:
  try:
    args.binFilters = str_to_class( "TuningTools.CrossValidStat", args.binFilters )
    args.binFilters = getFilters( args.binFilters, args.inputFiles, 
                                  printf = mainLogger.info )
  except TypeError:
    args.binFilters = csvStr2List( args.binFilters )
  args.inputFiles = select( args.inputFiles, args.binFilters ) 
  if len(args.binFilters) is 1:
    args.inputFiles = [args.inputFiles]
else:
Example #9
0
  mkdir_p( args.tmpFolder )
  import tempfile
  tempfile.tempdir = args.tmpFolder

if mainLogger.isEnabledFor( LoggingLevel.DEBUG ):
  import cProfile, pstats, StringIO
  pr = cProfile.Profile()
  pr.enable()

## Treat special arguments
# Check if binFilters is a class
if args.binFilters is not NotSet:
  try:
    args.binFilters = str_to_class( "TuningTools.CrossValidStat", args.binFilters )
  except (TypeError, AttributeError,):
    args.binFilters = csvStr2List( args.binFilters )

# Retrieve reference benchmark:
call_kw = {}
if args.refFile is not None:
  # If user has specified a reference performance file:
  mainLogger.info("Loading reference file...")
  effArchieve = BenchmarkEfficiencyArchieve.load(args.refFile, loadCrossEfficiencies = True)
  refBenchmarkCol = ReferenceBenchmarkCollection([])
  if args.operation is None:
    args.operation = effArchieve.operation
  from TuningTools.dataframe import RingerOperation
  refLabel = RingerOperation.tostring( args.operation )
  from TuningTools import getEfficiencyKeyAndLabel
  efficiencyKey, refLabel = getEfficiencyKeyAndLabel( args.refFile, args.operation )
  from itertools import product
args = parser.parse_args( namespace = CrossValidStatNamespace() )
mainLogger = Logger.getModuleLogger(__name__)
mainLogger.level = args.output_level

if mainLogger.isEnabledFor( LoggingLevel.DEBUG ):
  import cProfile, pstats, StringIO
  pr = cProfile.Profile()
  pr.enable()

## Treat special arguments
# Check if binFilters is a class
if args.binFilters is not NotSet:
  try:
    args.binFilters = str_to_class( "TuningTools.CrossValidStat", args.binFilters )
  except (TypeError, AttributeError,):
    args.binFilters = csvStr2List( args.binFilters )

# Retrieve reference benchmark:
call_kw = {}
if args.refFile is not None:
  # If user has specified a reference performance file:
  TDArchieve = TuningDataArchieve(args.refFile)
  nEtBins = TDArchieve.nEtBins()
  nEtaBins = TDArchieve.nEtaBins()
  refBenchmarkCol = ReferenceBenchmarkCollection([])
  from itertools import product
  with TDArchieve as data:
    if args.operation is None:
      args.operation = data['operation']
    from TuningTools.ReadData import RingerOperation
    args.operation = RingerOperation.retrieve(args.operation)
Example #11
0
                        parents=[mainParser, loggerParser],
                        conflict_handler='resolve')
parser.make_adjustments()

emptyArgumentsPrintHelp(parser)

## Retrieve parser args:
args = parser.parse_args(namespace=LoggerNamespace())

mainLogger = Logger.getModuleLogger(__name__, args.output_level)
mainLogger.debug("Raw input files are:")
if mainLogger.isEnabledFor(LoggingLevel.DEBUG):
    pprint(args.inputFiles)
## Treat special arguments
if len(args.inputFiles) == 1:
    args.inputFiles = csvStr2List(args.inputFiles[0])
args.inputFiles = expandFolders(args.inputFiles)
mainLogger.verbose("All input files are:")
if mainLogger.isEnabledFor(LoggingLevel.VERBOSE):
    pprint(args.inputFiles)
if args.binFilters is not NotSet:
    try:
        args.binFilters = str_to_class("TuningTools.CrossValidStat",
                                       args.binFilters)
        args.binFilters = getFilters(args.binFilters,
                                     args.inputFiles,
                                     printf=mainLogger.info)
    except (TypeError, AttributeError):
        args.binFilters = csvStr2List(args.binFilters)
    args.inputFiles = select(args.inputFiles, args.binFilters)
    if len(args.binFilters) is 1:
Example #12
0
                    dest='cores',
                    required=False,
                    default=4,
                    help="The number of cores processor per job")

import sys, os
if len(sys.argv) == 1:
    parser.print_help()
    sys.exit(1)
args = parser.parse_args()

defaultCore = os.environ["OMP_NUM_THREADS"]
os.environ["OMP_NUM_THREADS"] = args.cores

# Take all files
fList = csvStr2List(args.fList)
fList = expandFolders(fList)

from TuningTools import GridJobFilter
gridJobFilter = GridJobFilter()
fList = gridJobFilter(fList)

process_pipe = []
output_stack = []
import subprocess
from pprint import pprint

while len(fList) > 0:

    if len(process_pipe) < int(args.maxJobs):
        job_id = len(fList)