Example #1
0
 def get_best_kappa4( self, NE, yPredR ):
   #This function returns the thresholds that produce the highest kappa
   #when the score can vary from 0 to 4
   bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = 0, 0, 0, 0, 0
   yPred = yPredR    
   for thresholdV1 in range( 1, 2 * NE ):       
     for thresholdV2 in range( thresholdV1 + 1 , 2 * NE):
       for thresholdV3 in range( thresholdV2 + 1 , 3 * NE):
         for thresholdV4 in range( thresholdV3 + 1 , 4 * NE):
           threshold1  = thresholdV1 / ( NE * 1.0 )
           threshold2  = thresholdV2 / ( NE * 1.0 )
           threshold3  = thresholdV3 / ( NE * 1.0 )
           threshold4  = thresholdV4 / ( NE * 1.0 )
           if threshold1 < np.min( yPredR ): 
             continue
           if min( [ threshold2, threshold3, threshold4 ] ) > np.max( yPredR ) : 
             continue    
           yPred = ( yPredR >= threshold1 ) * 1        
           yPred = ( yPredR >= threshold2 ) * 1 + yPred
           yPred = ( yPredR >= threshold3 ) * 1 + yPred
           yPred = ( yPredR >= threshold4 ) * 1 + yPred
           cKappa = skllMetrics.kappa( self.y, yPred )
           if cKappa > bestKappa:
             bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = cKappa, threshold1, threshold2, threshold3, threshold4
   return bestKappa, [ bthreshold1, bthreshold2, bthreshold3, bthreshold4 ]
Example #2
0
 def get_best_kappa4(self, NE, yPredR):
     #This function returns the thresholds that produce the highest kappa
     #when the score can vary from 0 to 4
     bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = 0, 0, 0, 0, 0
     yPred = yPredR
     for thresholdV1 in range(1, 2 * NE):
         for thresholdV2 in range(thresholdV1 + 1, 2 * NE):
             for thresholdV3 in range(thresholdV2 + 1, 3 * NE):
                 for thresholdV4 in range(thresholdV3 + 1, 4 * NE):
                     threshold1 = thresholdV1 / (NE * 1.0)
                     threshold2 = thresholdV2 / (NE * 1.0)
                     threshold3 = thresholdV3 / (NE * 1.0)
                     threshold4 = thresholdV4 / (NE * 1.0)
                     if threshold1 < np.min(yPredR):
                         continue
                     if min([threshold2, threshold3, threshold4
                             ]) > np.max(yPredR):
                         continue
                     yPred = (yPredR >= threshold1) * 1
                     yPred = (yPredR >= threshold2) * 1 + yPred
                     yPred = (yPredR >= threshold3) * 1 + yPred
                     yPred = (yPredR >= threshold4) * 1 + yPred
                     cKappa = skllMetrics.kappa(self.y, yPred)
                     if cKappa > bestKappa:
                         bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = cKappa, threshold1, threshold2, threshold3, threshold4
     return bestKappa, [bthreshold1, bthreshold2, bthreshold3, bthreshold4]
Example #3
0
 def evaluateCl1(self, model, SVM=False):
     #This function evaluates Logistic Regression or Support Vector Machine Model when the possible
     #scores may be 0 or 1. It returns the threshold and current parameters that produce the highest kappa.
     yProb = self.cv_estimateCl1(model, SVM)
     bestKappa, bthreshold = 0, 0.5
     for div in range(1, 100):
         threshold = div / 100.0
         yPred = yProb > threshold
         ckappa = skllMetrics.kappa(self.y, yPred)
         if ckappa > bestKappa:
             bestKappa = ckappa
             bthreshold = threshold
     return bestKappa, model.param1, model.param2, bthreshold
Example #4
0
 def evaluateCl1( self, model  , SVM = False):
   #This function evaluates Logistic Regression or Support Vector Machine Model when the possible 
   #scores may be 0 or 1. It returns the threshold and current parameters that produce the highest kappa.
   yProb = self.cv_estimateCl1( model, SVM )
   bestKappa, bthreshold = 0, 0.5
   for div in range(1,100): 
     threshold = div / 100.0
     yPred = yProb > threshold
     ckappa = skllMetrics.kappa( self.y, yPred )
     if ckappa > bestKappa:
       bestKappa = ckappa
       bthreshold = threshold 
   return bestKappa, model.param1, model.param2, bthreshold
Example #5
0
  def evaluateGB1(self, model, nTree, fixed = False):
    #This function evaluates the Gradient Boosting Machine Model when the possible 
    #scores may be 0 or 1. It returns the tree and threshold that produce the highest kappa.

    yProbL = self.cv_estimateGB1( model , nTree)
    bestKappa, bthreshold, bTree = 0, 0, 0
    NE = 20
    treeCandidates = [nTree-1] if fixed else [ 20, 30, 50, 100, 150, 200, 250, 300, 350 , 400, 450, 500 , 550, 600, 750, 1000, 1250, 1500 ]
    for j in treeCandidates:
      cTree= j - 1
      yProb = yProbL[cTree]
      for div in range(1,20):   
        threshold = div / 100.0
        yPred = yProb > threshold
        ckappa = skllMetrics.kappa( self.y, yPred )
        if ckappa > bestKappa:          
          bestKappa, bthreshold, bTree = ckappa, threshold, cTree 
    return bestKappa, bTree, model.param2, bthreshold
Example #6
0
 def get_best_kappa2(self, NE, yPredR):
     #This function returns the thresholds that produce the highest kappa
     #when the score can vary from 0 to 2
     bestKappa, bthreshold1, bthreshold2 = 0, 0, 0
     yPred = yPredR
     for thresholdV1 in range(1, 2 * NE):
         for thresholdV2 in range(thresholdV1 + 1, 2 * NE):
             threshold1 = thresholdV1 / (NE * 1.0)
             threshold2 = thresholdV2 / (NE * 1.0)
             if threshold1 < np.min(yPredR):
                 continue
             if threshold2 > np.max(yPredR):
                 continue
             yPred = (yPredR >= threshold1) * 1
             yPred = (yPredR >= threshold2) * 1 + yPred
             cKappa = skllMetrics.kappa(self.y, yPred)
             if cKappa > bestKappa:
                 bestKappa, bthreshold1, bthreshold2 = cKappa, threshold1, threshold2
     return bestKappa, bthreshold1, bthreshold2
Example #7
0
 def get_best_kappa2( self, NE, yPredR ):
   #This function returns the thresholds that produce the highest kappa
   #when the score can vary from 0 to 2
   bestKappa, bthreshold1, bthreshold2 = 0, 0, 0
   yPred = yPredR
   for thresholdV1 in range( 1, 2 * NE ):       
     for thresholdV2 in range( thresholdV1 + 1 , 2 * NE):
       threshold1  = thresholdV1 / ( NE * 1.0 )
       threshold2  = thresholdV2 / ( NE * 1.0 )
       if threshold1 < np.min( yPredR ): 
         continue
       if threshold2 > np.max( yPredR ): 
         continue    
       yPred = ( yPredR >= threshold1 ) * 1        
       yPred = ( yPredR >= threshold2 ) * 1 + yPred
       cKappa = skllMetrics.kappa( self.y, yPred )
       if cKappa > bestKappa:
         bestKappa, bthreshold1, bthreshold2 = cKappa, threshold1, threshold2
   return bestKappa, bthreshold1, bthreshold2
Example #8
0
 def evaluateCl2(self, model, SVM=False):
     #This function evaluates Logistic Regression or Support Vector Machine Model when the possible
     #scores may vary from 0 to 2. It returns the threshold and current parameters that produce the highest kappa.
     NE = 20
     if SVM:
         bestKappa, bthreshold1, bthreshold2 = self.evaluateSV2(model)
         return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
     yProb1, yProb2 = self.cv_estimateCl2(model, SVM)
     bestKappa, bthreshold1, bthreshold2 = 0, 0.5, 0.5
     for thresholdV1 in range(1, NE):
         for thresholdV2 in range(1, NE):
             threshold1 = thresholdV1 / (NE * 1.0)
             threshold2 = thresholdV2 / (NE * 1.0)
             yPred1 = (yProb1 > threshold1) * 1
             yPred2 = (yProb2 > threshold2) * 2
             yPred = np.maximum(yPred1, yPred2)
             ckappa = skllMetrics.kappa(self.y, yPred)
             if ckappa > bestKappa:
                 bestKappa, bthreshold1, bthreshold2 = ckappa, threshold1, threshold2
     return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
Example #9
0
 def evaluateCl2( self, model , SVM = False):  
   #This function evaluates Logistic Regression or Support Vector Machine Model when the possible 
   #scores may vary from 0 to 2. It returns the threshold and current parameters that produce the highest kappa.
   NE = 20
   if SVM:
     bestKappa, bthreshold1, bthreshold2 = self.evaluateSV2( model )
     return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
   yProb1, yProb2 = self.cv_estimateCl2( model , SVM)    
   bestKappa, bthreshold1, bthreshold2 = 0, 0.5, 0.5
   for thresholdV1 in range(1,NE): 
     for thresholdV2 in range(1,NE): 
       threshold1 = thresholdV1 / ( NE * 1.0 )
       threshold2 = thresholdV2 / ( NE * 1.0 )
       yPred1 = ( yProb1 > threshold1 ) * 1 
       yPred2 = ( yProb2 > threshold2 ) * 2
       yPred = np.maximum( yPred1, yPred2)
       ckappa = skllMetrics.kappa( self.y, yPred )
       if ckappa > bestKappa:
         bestKappa, bthreshold1, bthreshold2 = ckappa, threshold1, threshold2
   return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
Example #10
0
    def evaluateGB1(self, model, nTree, fixed=False):
        #This function evaluates the Gradient Boosting Machine Model when the possible
        #scores may be 0 or 1. It returns the tree and threshold that produce the highest kappa.

        yProbL = self.cv_estimateGB1(model, nTree)
        bestKappa, bthreshold, bTree = 0, 0, 0
        NE = 20
        treeCandidates = [nTree - 1] if fixed else [
            20, 30, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600,
            750, 1000, 1250, 1500
        ]
        for j in treeCandidates:
            cTree = j - 1
            yProb = yProbL[cTree]
            for div in range(1, 20):
                threshold = div / 100.0
                yPred = yProb > threshold
                ckappa = skllMetrics.kappa(self.y, yPred)
                if ckappa > bestKappa:
                    bestKappa, bthreshold, bTree = ckappa, threshold, cTree
        return bestKappa, bTree, model.param2, bthreshold