예제 #1
0
 def __init__(self, **kw):
   LoggerRawDictStreamer.__init__( self, 
       transientAttrs = set() | kw.pop('transientAttrs', set()),
       toPublicAttrs = {'_nSorts','_nBoxes',
         '_nTrain','_nValid',
         '_nTest', '_method','_sort_boxes_list'} | kw.pop('toPublicAttrs', set()), 
       **kw )
예제 #2
0
 def __init__(self, **kw):
     LoggerRawDictStreamer.__init__(
         self,
         transientAttrs=set() | kw.pop('transientAttrs', set()),
         toPublicAttrs={
             '_nSorts', '_nBoxes', '_nTrain', '_nValid', '_nTest',
             '_method', '_sort_boxes_list'
         } | kw.pop('toPublicAttrs', set()),
         #ignoreAttrs = {'_backend',}
         **kw)
예제 #3
0
class Subset(LoggerStreamable):

  # There is only need to change version if a property is added
  _streamerObj = LoggerRawDictStreamer(toPublicAttrs = {'_ppChain'})
  _cnvObj      = RawDictCnv(toProtectedAttrs         = {'_ppChain'})

  def __init__(self, d={}, **kw):
    d.update( kw )
    self._ppChain    = d.pop('ppChain', PreProcChain(PrepObj()) )
    self._range      = d.pop('binRange'  , None)
    self._patternIdx = d.pop('pattern'   , 0)
    LoggerStreamable.__init__(self, d)

  def __call__(self, data):
    return self._apply(data)

  @abstractmethod
  def _apply(self, data):
    """
      Overload this method to apply the pre-processing
    """
    return self._ppChain.takeParams(data)

  def isRevertible(self):
    # Not possible to return after this
    return False

  def getBin(self):
    return self._range

  def setPatternIndex(self, idx):
    self._patternIdx=idx

  def checkPatternIndex(self,idx):
    if idx==self._patternIdx:
      return True
    else:
      return False

  def getPatternIndex(self):
    return self._patternIdx
예제 #4
0
class SomCluster( Subset ):

  # There is only need to change version if a property is added
  _streamerObj = LoggerRawDictStreamer(toPublicAttrs = {'_code_book','_w'})
  _cnvObj      = RawDictCnv(toProtectedAttrs         = {'_code_book','_w'})

  def __init__(self, d={}, **kw):
    """
      Cluster finder class base on three parameters:
        code_book: centroids of the cluster given by any algorithm (e.g: kmeans)
        w        : weights, this will multipli the size of the cluster depends of the factor
                   e.g: the cluster was found 100 events and the w factor is 2. In the end we
                   will duplicate the events into the cluster to 200.
        matrix   : projection apply on the centroids.
        p_cluster: cluster target for each neuron map
    """
    d.update( kw ); del kw
    Subset.__init__(self,d) 

    self._code_book = d.pop('code_book', [])
    self._p_cluster = d.pop('p_cluster', [])
    self._w         = d.pop('w'  , 1   )
    checkForUnusedVars(d, self._warning )  
    del d
    # Some protections before start
    if type(self._code_book) is list:
      self._code_book = npCurrent.array(self._code_book)
    # If weigth factor is an integer, transform to an array of factors with the 
    # same size of the centroids
    if type(self._w) is int:
      self._w = npCurrent.int_array([self._w for i in range(self._code_book.shape[0])] )
    # transform to np.array if needed
    if type(self._w) is list:
      self._w = npCurrent.int_array(self._w)
    # In case to pass a list of weights, we need to check if weights and centroids has the same length.
    if self._w.shape[0] != self._code_book.shape[0]:
      raise ValueError("Weight factor must be an int, list or np.array with the same size than the code book param")
  #__init__ end


  def __call__(self, data):
    return self._apply(data)
  
  def _apply(self,data):
    """
    This function is slower than the C version but works for
    all input types.  If the inputs have the wrong types for the
    C versions of the function, this one is called as a last resort.

    It is about 20 times slower than the C version.
    """
    # Take param and apply pre-processing
    # hold the unprocess data
    self._ppChain.takeParams(data)
    tdata = self._ppChain(data)

    # n = number of observations
    # d = number of features
    if np.ndim(tdata) == 1:
      if not np.ndim(tdata) == np.ndim(self._code_book):
        raise ValueError("Observation and code_book should have the same rank")
    else:
      (n, d) = tdata.shape
    # code books and observations should have same number of features and same shape
    if not np.ndim(tdata) == np.ndim(self._code_book):
      raise ValueError("Observation and code_book should have the same rank")
    elif not d == self._code_book.shape[1]:
      raise ValueError("Code book(%d) and obs(%d) should have the same "
                       "number of features (eg columns)""" %
                       (self._code_book.shape[1], d))

    bmus = tensor_frobenius_argmin(tdata,self._code_book,10000,self._logger)    
    code = np.zeros(bmus.shape)
    # Fix matlab index 
    self._p_cluster = self._p_cluster-1
    for n in range(bmus.shape[0]):
      code[n] = self._p_cluster[bmus[n]]
    
    # Release memory
    del tdata
    gc.collect()
    # Join all clusters into a list of clusters
    cpattern=[]
    for target in range(max(self._p_cluster)+1):
      cpattern.append(data[np.where(code==target)[0],:])
    
    # Resize the cluster
    for i, c in enumerate(cpattern):
      cpattern[i] = np.repeat(cpattern[i],self._w[i],axis=0)  
      self._info('Cluster %d and factor %d with %d events and %d features',
                        i,self._w[i],cpattern[i].shape[0],cpattern[i].shape[1])
    return cpattern
예제 #5
0
class GMMCluster( Cluster ):
  # There is only need to change version if a property is added
  _streamerObj = LoggerRawDictStreamer(toPublicAttrs = {'_sigma'})
  _cnvObj      = RawDictCnv(toProtectedAttrs         = {'_sigma'})

  def __init__(self,  d={}, **kw):
    """
      Cluster finder class base on three parameters:
        code_book: centroids of the cluster given by any algorithm (e.g: kmeans)
        w        : weights, this will multipli the size of the cluster depends of the factor
                   e.g: the cluster was found 100 events and the w factor is 2. In the end we
                   will duplicate the events into the cluster to 200.
        matrix   : projection apply on the centroids.
        sigma    : variance param of the gaussian, this algorithm will calculate the likelihood 
                   value using: lh[i] = np.exp(np.power((data-centroid[i])/sigma[i],2))
    """
    d.update( kw ); del kw
    self._sigma = d.pop('sigma' , npCurrent.array([])   )
    Cluster.__init__(self, d) 
    del d

    # Checking the sigma type
    if type(self._sigma) is list:
      self._sigma = npCurrent.array(self._sigma)
    if not self._sigma.shape == self._code_book.shape:
      raise ValueError("Code book and sigma matrix should have the same shape")
    #__init__ end


  def _apply(self,data):
    """
    This function is slower than the C version but works for
    all input types.  If the inputs have the wrong types for the
    C versions of the function, this one is called as a last resort.

    It is about 20 times slower than the C version.
    """
    # Take param and apply pre-processing
    # hold the unprocess data
    self._ppChain.takeParams(data)
    tdata = self._ppChain(data)
    # n = number of observations
    # d = number of features
    if np.ndim(tdata) == 1:
      if not np.ndim(tdata) == np.ndim(self._code_book):
        raise ValueError("Observation and code_book should have the same rank")
    else:
      (n, d) = tdata.shape
    # code books and observations should have same number of features and same shape
    if not np.ndim(tdata) == np.ndim(self._code_book):
      raise ValueError("Observation and code_book should have the same rank")
    elif not d == self._code_book.shape[1]:
      raise ValueError("Code book(%d) and obs(%d) should have the same "
                       "number of features (eg columns)""" %
                       (self._code_book.shape[1], d))
    # Prob finder equation is:
    # tdata     is n X d
    # code_book is m X d where m is the number of clusters
    # Sigma     is m X d
    # Prob = exp()
    # see here: http://scipy.github.io/old-wiki/pages/EricsBroadcastingDoc
    code = np.argmax(np.sum(np.exp(np.power((tdata[:,np.newaxis]-self._code_book),2)/self._sigma[np.newaxis,:]),axis=-1),axis=1)

    del tdata
    gc.collect()
    
    # Join all clusters into a list of clusters
    cpattern=[]
    for target in range(self._code_book.shape[0]):
      cpattern.append(data[np.where(code==target)[0],:])
    
    # Resize the cluster
    for i, c in enumerate(cpattern):
      cpattern[i] = np.repeat(cpattern[i],self._w[i],axis=0)  
      self._info('Cluster %d and factor %d with %d events and %d features',\
                        i,self._w[i],cpattern[i].shape[0],cpattern[i].shape[1])
    return cpattern
예제 #6
0
class LinearLHThresholdCorrection( LoggerRawDictStreamer ):
  """
  Applies the likelihood threshold correction method

  -> Version 1: Sets limits, intercept, slope, interceptBkg, slopeBkg
  """
  _streamerObj = LoggerRawDictStreamer( transientAttrs = {'_discr', '_dataCurator'
                                                         , '_baseLabel'
                                                         , 'sgnOut', 'bkgOut'
                                                         , 'sgnHist', 'bkgHist'
                                                         , 'sgnCorrData', 'bkgCorrDataList'
                                                         , '_effSubset', '_effOutput'} )
  _version = 1

  def __init__( self, discriminator = None, dataCurator = None, pileupRef = None
              , limits = None, maxCorr = None, frMargin = None ):
    Logger.__init__( self )
    self._discr            = discriminator
    self._dataCurator      = dataCurator
    self._baseLabel        = ''
    # Decision making parameters: 
    self.pileupRef         = PileupReference.retrieve( pileupRef )
    self.limits            = limits
    self.maxCorr           = maxCorr
    if frMargin is not None: self.frMargin = [1.] + frMargin if frMargin[0] != 1. else frMargin
    self._pileupLabel      = PileupReference.label( self.pileupRef )
    self._pileupShortLabel = PileupReference.shortlabel( self.pileupRef )
    self.subset            = None
    if self.pileupRef is PileupReference.avgmu:
      #limits = [0,26,60]
      self.limits = [15,37.5,60] if limits is None else limits
    elif self.pileupRef is PileupReference.nvtx:
      self.limits = [0,13,30] if limits is None else limits
    # Other transient attributes:
    self.sgnOut          = None
    self.bkgOut          = None
    self.sgnHist         = None
    self.bkgHist         = None
    self.sgnCorrData     = None
    self.bkgCorrDataList = None
    self._effSubset      = None
    self._effOutput      = None
    self._ol              = 12. if self._discr.removeOutputTansigTF else 1.
    # Decision taking parameters: 
    self.intercept       = None
    self.slope           = None
    self.slopeRange      = None
    # Thresholds:
    self.thres           = None
    self.rawThres        = None
    # Performance:
    self.perf            = None
    self.rawPerf         = None
    # Bin information:
    self.etBinIdx        = self._dataCurator.etBinIdx
    self.etaBinIdx       = self._dataCurator.etaBinIdx
    self.etBin           = self._dataCurator.etBin.tolist()   
    self.etaBin          = self._dataCurator.etaBin.tolist()  

  def saveGraphs( self ):
    from ROOT import TH1F
    sStr = CuratedSubset.tostring( self.subset )
    self.sgnCorrData.save( 'signalCorr_' + self._baseLabel + '_' + sStr)
    for i, bkgCorrData in enumerate(self.bkgCorrDataList):
      bkgCorrData.save( 'backgroundCorr_' + str(i) + '_' + self._baseLabel  + '_' + sStr)
    
    sgnPileup = self.getPileup( CuratedSubset.tosgn( self.subset ) )
    sgnPassPileup = sgnPileup[ self.rawThres.getMask( self.sgnOut ) ]
    sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel  + '_' + sStr )
    sgnEff.Write()
    bkgPileup = self.getPileup( CuratedSubset.tobkg( self.subset ) )
    bkgPassPileup = bkgPileup[ self.rawThres.getMask( self.bkgOut ) ]
    bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel  + '_' + sStr )
    bkgEff.Write()
    # Write 2D histograms:
    self.sgnHist.Write( 'signal2DCorr_' + self._baseLabel  + '_' + sStr)
    self.bkgHist.Write( 'background2DCorr_' + self._baseLabel  + '_' + sStr)
    # Write output histograms:
    title = CuratedSubset.tostring( CuratedSubset.tosgn( self.subset ) ) + ' NN Output'
    sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
    for p in self.sgnOut: sgnTH1.Fill( p )
    sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr )
    title = CuratedSubset.tostring( CuratedSubset.tobkg( self.subset ) ) + ' NN Output'
    bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
    for p in self.bkgOut: bkgTH1.Fill( p )
    bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr )
    if self._effSubset is not None:
      esubset = CuratedSubset.tobinary( CuratedSubset.topattern( self._effSubset[0] ) ) 
      if esubset is not self.subset:
        sStr = CuratedSubset.tostring( esubset )
        # Here we have also to plot the subset where we have calculated the efficiency
        sgnPileup = self.getPileup( CuratedSubset.tosgn( self._effSubset[0] ) )
        sgnPassPileup = sgnPileup[ self.rawThres.getMask( self._effOutput[0] ) ]
        sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalUncorr_' + self._baseLabel  + '_' + sStr )
        sgnEff.Write()
        sgnPassPileup = sgnPileup[ self.thres.getMask( self._effOutput[0], sgnPileup ) ]
        sgnEff = PileupEffHist( sgnPassPileup, sgnPileup, defaultNvtxBins, 'signalCorr_' + self._baseLabel  + '_' + sStr )
        sgnEff.Write()
        bkgPileup = self.getPileup( CuratedSubset.tobkg( self._effSubset[1] ) )
        bkgPassPileup = bkgPileup[ self.rawThres.getMask( self._effOutput[1] ) ]
        bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundUncorr_' + self._baseLabel  + '_' + sStr )
        bkgEff.Write()
        bkgPassPileup = bkgPileup[ self.thres.getMask( self._effOutput[1], bkgPileup ) ]
        bkgEff = PileupEffHist( bkgPassPileup, bkgPileup, defaultNvtxBins, 'backgroundCorr_' + self._baseLabel  + '_' + sStr )
        bkgEff.Write()
        sgnHist = self.get2DPerfHist( self._effSubset[0], 'signal2DCorr_' + self._baseLabel + '_' + sStr,     outputs = self._effOutput[0] )
        sgnHist.Write('signal2DCorr_' + self._baseLabel + '_' + sStr)
        bkgHist = self.get2DPerfHist( self._effSubset[1], 'background2DCorr_' + self._baseLabel + '_' + sStr, outputs = self._effOutput[1] )
        bkgHist.Write('background2DCorr_' + self._baseLabel + '_' + sStr)
        # 1D output plots
        title = CuratedSubset.tostring( self._effSubset[0] ) + ' NN Output'
        sgnTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
        for p in self.sgnOut: sgnTH1.Fill( p )
        sgnTH1.Write( 'signalOutputs_' + self._baseLabel + '_' + sStr )
        title = CuratedSubset.tostring( self._effSubset[1] ) + ' NN Output'
        bkgTH1 = TH1F( title, title, 100, -self._ol, self._ol ) 
        for p in self.bkgOut: bkgTH1.Fill( p )
        bkgTH1.Write( 'backgroundOutputs_' + self._baseLabel + '_' + sStr )

  def __call__( self, referenceObj, subset, *args, **kw ):
    " Hook method to discriminantLinearCorrection "
    self.discriminantLinearCorrection( referenceObj, subset, *args, **kw )

  def releasetData( self ):
    self.sgnOut          = None
    self.bkgOut          = None
    self._effOutput      = None
    self.sgnHist         = None
    self.bkgHist         = None
    self.sgnCorrData     = None
    self.bkgCorrDataList = None

  def _getCorrectionData( self, referenceObj, **kw ):
    neuron = kw.get('neuron', None )
    sort   = kw.get('sort', None )
    init   = kw.get('init', None )
    self._baseLabel = "ref%s_etBin%d_etaBin%d%s%s%s" % ( ReferenceBenchmark.tostring( referenceObj.reference )
        , self._dataCurator.etBinIdx
        , self._dataCurator.etaBinIdx
        , ( '_neuron%d' % neuron ) if neuron is not None else ''
        , ( '_sort%d' % sort ) if sort is not None else ''
        , ( '_init%d' % init ) if init is not None else '' )
    self.sgnHist, self.sgnOut = self.get2DPerfHist( CuratedSubset.tosgn(self.subset), 'signal_' + self._baseLabel,     getOutputs = True )
    self.bkgHist, self.bkgOut = self.get2DPerfHist( CuratedSubset.tobkg(self.subset), 'background_' + self._baseLabel, getOutputs = True )
    if not self.rawPerf:
      try:
        from libTuningToolsLib import genRoc
      except ImportError:
        from libTuningTools import genRoc
      # Get raw threshold:
      if referenceObj.reference is ReferenceBenchmark.Pd:
        raw_thres = RawThreshold( - np.percentile( -self.sgnOut, referenceObj.refVal * 100. ) )
      elif referenceObj.reference is ReferenceBenchmark.Pf:
        raw_thres = RawThreshold( - np.percentile( -self.bkgOut, referenceObj.refVal * 100. ) )
      else:
        o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 )
        roc = Roc( o
                 , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                 , etBin = self.etBin, etaBin = self.etaBin )
        self.rawPerf = roc.retrieve( referenceObj )
      if referenceObj.reference in ( ReferenceBenchmark.Pd, ReferenceBenchmark.Pf ):
        self.rawPerf = self.getEffPoint( referenceObj.name
                                       , thres = raw_thres
                                       , makeCorr = False )
        # Here we protect against choosing suboptimal Pd/Pf points:
        o = genRoc(self.sgnOut, self.bkgOut, self._ol, -self._ol, 0.001 )
        roc = Roc( o
                 , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                 , etBin = self.etBin, etaBin = self.etaBin )
        # Check whether we could be performing better:
        if referenceObj.reference is ReferenceBenchmark.Pd:
          mask = roc.pds >= referenceObj.refVal
        elif referenceObj.reference is ReferenceBenchmark.Pf:
          mask = roc.pfs <= referenceObj.refVal
        pds = roc.pds[mask]
        pfs = roc.pfs[mask]
        sps = roc.sps[mask]
        if referenceObj.reference is ReferenceBenchmark.Pd:
          mask = pfs <= ( 1.001 * self.rawPerf.pf )
          sps = sps[mask]
          pfs = pfs[mask]
          if len(sps):
            idx = np.argmax(sps)
            if pfs[idx] < 0.98 * self.rawPerf.pf:
              self._warning('Model is sub-optimal when performing at requested Pd.')
              self._info('Using highest SP operation point with virtually same Pf.')
              raw_thres = RawThreshold( - np.percentile( -self.bkgOut, pfs[idx] * 100. ) )
              self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
              self.rawPerf = self.getEffPoint( referenceObj.name
                                             , thres = raw_thres
                                             , makeCorr = False )
              self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
        elif referenceObj.reference is ReferenceBenchmark.Pf:
          mask = pds >= ( 0.999 * self.rawPerf.pd )
          sps = sps[mask]
          pds = pds[mask]
          if len(sps):
            idx = np.argmax(sps)
            if pds[idx] > 1.005 * self.rawPerf.pd:
              self._warning('Model is sub-optimal when performing at requested Pf.')
              self._info('Using highest SP operation point with virtually same Pd.')
              raw_thres = RawThreshold( - np.percentile( -self.sgnOut, pds[idx] * 100. ) )
              self._debug('Previous preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
              self.rawPerf = self.getEffPoint( referenceObj.name
                                             , thres = raw_thres
                                             , makeCorr = False )
              self._debug('New preformance was: %s', self.rawPerf.asstr( addname = False, perc = True, addthres = False ))
    else:
      self._debug('Skipped calculating raw performance since we already have it calculated')
    self.rawThres = self.rawPerf.thres
    # use standard lh method using signal data:
    sgnCorrData = LHThresholdCorrectionData( self.sgnHist, self.rawPerf.pd, self.rawThres.thres, self.limits, 1. )
    return sgnCorrData

  def getPileup(self, subset): 
    baseinfo = self._dataCurator.getBaseInfo(subset, BaseInfo.PileUp)
    if CuratedSubset.isbinary( subset ):
      if CuratedSubset.isoperation( subset ):
        ret = [np.concatenate(b, axis = npCurrent.odim).squeeze().astype(dtype='float64') for b in baseinfo]
      else:
        ret = [b.squeeze().astype(dtype='float64') for b in baseinfo]
    else:
      if CuratedSubset.isoperation( subset ):
        ret = np.concatenate(baseinfo, axis = npCurrent.odim).squeeze().astype(dtype='float64')
      else:
        ret = baseinfo.squeeze().astype(dtype='float64')
    if (ret == 0.).all():
      self._fatal("All pile-up data is zero!")
    return ret

  def getOutput(self, subset): 
    self._verbose('Propagating output for subset: %s', CuratedSubset.tostring(subset))
    data = self._dataCurator[CuratedSubset.topattern( subset )]
    inputDim = self._dataCurator.nInputs
    if inputDim != self._discr.getNumNodes(0):
      self._fatal( "Data number of patterns (%d) do not match with discriminator input dimension (%d)!"
                 ,  inputDim, self._discr.getNumNodes(0))
    if CuratedSubset.isbinary( subset ):
      if CuratedSubset.isoperation( subset ):
        output = [np.concatenate([self._discr.propagate_np(sd) for sd in d], axis  = npCurrent.odim) for d in data]
      else:
        output = [self._discr.propagate_np(d) for d in data]
    else:
      if CuratedSubset.isoperation( subset ):
        output = np.concatenate([self._discr.propagate_np(d) for d in data], axis = npCurrent.odim)
      else:
        output = self._discr.propagate_np(data)
    return output

  def _calcEff( self, subset, output = None, pileup = None, thres = None, makeCorr = True ):
    self._verbose('Calculating efficiency for %s', CuratedSubset.tostring( subset ) )
    pileup = self.getPileup(subset) if pileup is None else pileup
    if output is None: output = self.getOutput(subset)
    if thres is None: thres = self.thres if makeCorr else self.rawThres
    args = (output, pileup) if makeCorr else (output,)
    return thres.getPerf( *args )

  def getEffPoint( self, name, subset = [None, None], outputs = [None, None], pileup = [None,None], thres = None, makeCorr = True ):
    from TuningTools.Neural import PerformancePoint
    auc = self.rawPerf.auc if self.rawPerf else -1
    if not isinstance(subset, (tuple,list)): 
      if subset is None: 
        if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
        subset = self.subset
      subset = [CuratedSubset.tosgn(subset),CuratedSubset.tobkg(subset)]
    else:
      if len(subset) is 1:
        if subset[0] is None: 
          if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
          subset = [self.subset]
        subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[0])]
      else:
        if any([s is None for s in subset]): 
          if not(any([o is None for o in outputs])): self._fatal("Subset must be specified when outputs is used.")
          subset = [self.subset, self.subset]
        subset = [CuratedSubset.tosgn(subset[0]),CuratedSubset.tobkg(subset[1])]
    self._effSubset = subset
    if any([o is None for o in outputs]):
      #if outputs[0] is None:
      if isinstance(subset, (list,tuple)):
        # FIXME This assumes that sgnOut is cached:
        outputs = [(self.getOutput(CuratedSubset.topattern(s)) if CuratedSubset.tobinary(s) is not self.subset else o )
                   for o, s in zip([self.sgnOut,self.bkgOut], subset)]
      else:
        if CuratedSubset.tobinary(subset) is not self.subset:
          outputs = self.getOutput(CuratedSubset.topattern(subset))
        else:
          outputs = [self.sgnOut,self.bkgOut]
      # NOTE: This can be commented out to improve speed
      try:
        from libTuningToolsLib import genRoc
      except ImportError:
        from libTuningTools import genRoc
      o = genRoc(outputs[0], outputs[1], +self._ol, -self._ol, 0.001 )
      auc = Roc( o ).auc
    self._effOutput = outputs
    if thres is None: thres = self.thres if makeCorr else self.rawThres
    pd = self._calcEff( subset[0], output = outputs[0], pileup = pileup[0], thres = thres, makeCorr = makeCorr )
    pf = self._calcEff( subset[1], output = outputs[1], pileup = pileup[1], thres = thres, makeCorr = makeCorr )
    sp = calcSP(pd, 1. - pf)
    return PerformancePoint( name, sp, pd, pf, thres, perc = False, auc = auc
                           , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                           , etBin = self.etBin, etaBin = self.etaBin )

  def getReach( self, sgnCorrData, bkgCorrDataList ):
    return [( ( ( bkgCorrData.intercept - sgnCorrData.intercept ) / ( sgnCorrData.slope - bkgCorrData.slope )  )
              if ( sgnCorrData.slope - bkgCorrData.slope ) else (999.) ) for bkgCorrData in bkgCorrDataList 
           ]

  def discriminantLinearCorrection( self, referenceObj, subset, **kw ):
    from ROOT import TH2F
    subset = CuratedSubset.tobinary( CuratedSubset.topattern( subset ) )
    if self.subset is subset:
      self._debug("Already retrieved parameters for subset %s.", CuratedSubset.tostring( subset ) )
      return
    self._info('Running linear correction...')
    # Reset raw perf:
    self.rawPerf = kw.pop('rawPerf', None)
    self.subset = subset
    self._verbose('Getting correction data')
    self.sgnCorrData  = self._getCorrectionData( referenceObj, **kw )
    self._verbose('Getting background correction data')
    self.bkgCorrDataList = [LHThresholdCorrectionData( self.bkgHist, self.rawPerf.pf * mult, self.rawThres.thres, self.limits, 1. ) 
                            for mult in self.frMargin]
    # Set final parameters:
    self._verbose('Getting linear correction threshold')
    self.thres = PileupLinearCorrectionThreshold( intercept = self.sgnCorrData.intercept, slope = self.sgnCorrData.slope
                                                , rawThres = self.rawThres.thres
                                                , reach = self.getReach( self.sgnCorrData, self.bkgCorrDataList )
                                                , margins = self.frMargin, limits = self.limits, maxCorr = self.maxCorr
                                                , pileupStr = self._pileupShortLabel
                                                , etBinIdx = self.etBinIdx, etaBinIdx = self.etaBinIdx
                                                , etBin = self.etBin, etaBin = self.etaBin )
    self._verbose('Getting performance')
    self.perf = self.getEffPoint( referenceObj.name, makeCorr = True )
    # Set performance results for monitoring purposes:
    self._debug("Retrieved following parameters and performance values using %s dataset", Dataset.tostring(Dataset.Train)  )
    self._debug("Raw: %s", self.rawPerf.asstr( perc = True, addthres = False ) )
    self._debug("Raw Threshold: %s", self.rawPerf.thresstr() )
    self._debug("<pile-up> limits: %r.", self.limits )
    self._debug("Linear correction: %s", self.perf.asstr( perc = True, addthres = False ) ) 
    self._debug("Linear correction Threshold: %s", self.perf.thresstr() ) 
    self._debug("Reach: %r", tuple(zip( self.frMargin[1:], self.thres.reach[1:])))

  def makePlots(self):
    # TODO Use this method when saving data to monitoring
    ## Plot information
    import ROOT
    ROOT.gROOT.SetBatch(ROOT.kTRUE)
    ROOT.gErrorIgnoreLevel=ROOT.kWarning
    ROOT.TH1.AddDirectory(ROOT.kFALSE)
    from TuningTools.monitoring.plots.PlotFunctions import SetupStyle
    mystyle = SetupStyle()
    #mystyle.SetTitleX(0.5)
    #mystyle.SetTitleAlign(23)
    #mystyle.SetPadBottomMargin(0.13)
    mystyle.SetOptStat(0)
    mystyle.SetOptTitle(0)
    c = PlotLinearEffCorr( sgnEff, self.sgnCorrData.histEff, 'signalEffComp_' + self._baseLabel
                         , xname = self._pileupLabel, limits = self.limits, refValue = eff.pd_value
                         , eff_uncorr = self.rawPerf, eff = self.perf
                         , etBin = None, etaBin = None )
    c.SaveAs( c.GetName() + '.pdf' )
    c = PlotLinearEffCorr( bkgEff, self.bkgCorrDataList[0].histEff, 'backgroundEffComp_' + self._baseLabel
                         , xname = self._pileupLabel, limits = self.limits, refValue = eff.pf_value
                         , eff_uncorr = self.rawPerf, eff = self.perf
                         , etBin = None, etaBin = None )
    c.SaveAs( c.GetName() + '.pdf' )
    # TODO Add background f1's
    c = Plot2DLinearFit( sgnList[0], title = 'signal2DHist_' + self._baseLabel
                       , xname = self._pileupLabel
                       , limits = self.limits, graph = self.sgnCorrData.graph
                       , label = 'Signal Train DS', eff_uncorr = self.rawPerf, eff = self.perf
                       , etBin = None, etaBin = None )
    c.SaveAs( c.GetName() + '.pdf' )
    c = Plot2DLinearFit( bkgList[0], title = 'background2DHist_' + self._baseLabel
                       , xname = self._pileupLabel
                       , limits = self.limits, thres = point.thres_value, graph = self.bkgCorrDataList[0].graph
                       , label = 'Background Train DS', eff_uncorr = self.rawPerf, eff = self.perf
                       , etBin = None, etaBin = None )
    c.SaveAs( c.GetName() + '.pdf' )

  def get2DPerfHist( self, subset, histLabel, outputs = None, getOutputs = False ):
    if outputs is None: outputs = self.getOutput(subset)
    from ROOT import TH2F#, MakeNullPointer
    # These limits are hardcoded for now, we might want to test them:
    hist = TH2F( histLabel, histLabel, 500, -self._ol, self._ol, len(defaultNvtxBins)-1, defaultNvtxBins )
    hist.Sumw2()
    hist.FillN( len(outputs) - 1, outputs.astype(dtype='float64')
              , self.getPileup(subset)
              , npCurrent.ones( outputs.shape, dtype='float64' ) # MakeNullPointer()
              , 1 )
    ret = hist
    if getOutputs:
      ret = (hist, outputs)
    return ret