def get_best_kappa4( self, NE, yPredR ): #This function returns the thresholds that produce the highest kappa #when the score can vary from 0 to 4 bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = 0, 0, 0, 0, 0 yPred = yPredR for thresholdV1 in range( 1, 2 * NE ): for thresholdV2 in range( thresholdV1 + 1 , 2 * NE): for thresholdV3 in range( thresholdV2 + 1 , 3 * NE): for thresholdV4 in range( thresholdV3 + 1 , 4 * NE): threshold1 = thresholdV1 / ( NE * 1.0 ) threshold2 = thresholdV2 / ( NE * 1.0 ) threshold3 = thresholdV3 / ( NE * 1.0 ) threshold4 = thresholdV4 / ( NE * 1.0 ) if threshold1 < np.min( yPredR ): continue if min( [ threshold2, threshold3, threshold4 ] ) > np.max( yPredR ) : continue yPred = ( yPredR >= threshold1 ) * 1 yPred = ( yPredR >= threshold2 ) * 1 + yPred yPred = ( yPredR >= threshold3 ) * 1 + yPred yPred = ( yPredR >= threshold4 ) * 1 + yPred cKappa = skllMetrics.kappa( self.y, yPred ) if cKappa > bestKappa: bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = cKappa, threshold1, threshold2, threshold3, threshold4 return bestKappa, [ bthreshold1, bthreshold2, bthreshold3, bthreshold4 ]
def get_best_kappa4(self, NE, yPredR): #This function returns the thresholds that produce the highest kappa #when the score can vary from 0 to 4 bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = 0, 0, 0, 0, 0 yPred = yPredR for thresholdV1 in range(1, 2 * NE): for thresholdV2 in range(thresholdV1 + 1, 2 * NE): for thresholdV3 in range(thresholdV2 + 1, 3 * NE): for thresholdV4 in range(thresholdV3 + 1, 4 * NE): threshold1 = thresholdV1 / (NE * 1.0) threshold2 = thresholdV2 / (NE * 1.0) threshold3 = thresholdV3 / (NE * 1.0) threshold4 = thresholdV4 / (NE * 1.0) if threshold1 < np.min(yPredR): continue if min([threshold2, threshold3, threshold4 ]) > np.max(yPredR): continue yPred = (yPredR >= threshold1) * 1 yPred = (yPredR >= threshold2) * 1 + yPred yPred = (yPredR >= threshold3) * 1 + yPred yPred = (yPredR >= threshold4) * 1 + yPred cKappa = skllMetrics.kappa(self.y, yPred) if cKappa > bestKappa: bestKappa, bthreshold1, bthreshold2, bthreshold3, bthreshold4 = cKappa, threshold1, threshold2, threshold3, threshold4 return bestKappa, [bthreshold1, bthreshold2, bthreshold3, bthreshold4]
def evaluateCl1(self, model, SVM=False): #This function evaluates Logistic Regression or Support Vector Machine Model when the possible #scores may be 0 or 1. It returns the threshold and current parameters that produce the highest kappa. yProb = self.cv_estimateCl1(model, SVM) bestKappa, bthreshold = 0, 0.5 for div in range(1, 100): threshold = div / 100.0 yPred = yProb > threshold ckappa = skllMetrics.kappa(self.y, yPred) if ckappa > bestKappa: bestKappa = ckappa bthreshold = threshold return bestKappa, model.param1, model.param2, bthreshold
def evaluateCl1( self, model , SVM = False): #This function evaluates Logistic Regression or Support Vector Machine Model when the possible #scores may be 0 or 1. It returns the threshold and current parameters that produce the highest kappa. yProb = self.cv_estimateCl1( model, SVM ) bestKappa, bthreshold = 0, 0.5 for div in range(1,100): threshold = div / 100.0 yPred = yProb > threshold ckappa = skllMetrics.kappa( self.y, yPred ) if ckappa > bestKappa: bestKappa = ckappa bthreshold = threshold return bestKappa, model.param1, model.param2, bthreshold
def evaluateGB1(self, model, nTree, fixed = False): #This function evaluates the Gradient Boosting Machine Model when the possible #scores may be 0 or 1. It returns the tree and threshold that produce the highest kappa. yProbL = self.cv_estimateGB1( model , nTree) bestKappa, bthreshold, bTree = 0, 0, 0 NE = 20 treeCandidates = [nTree-1] if fixed else [ 20, 30, 50, 100, 150, 200, 250, 300, 350 , 400, 450, 500 , 550, 600, 750, 1000, 1250, 1500 ] for j in treeCandidates: cTree= j - 1 yProb = yProbL[cTree] for div in range(1,20): threshold = div / 100.0 yPred = yProb > threshold ckappa = skllMetrics.kappa( self.y, yPred ) if ckappa > bestKappa: bestKappa, bthreshold, bTree = ckappa, threshold, cTree return bestKappa, bTree, model.param2, bthreshold
def get_best_kappa2(self, NE, yPredR): #This function returns the thresholds that produce the highest kappa #when the score can vary from 0 to 2 bestKappa, bthreshold1, bthreshold2 = 0, 0, 0 yPred = yPredR for thresholdV1 in range(1, 2 * NE): for thresholdV2 in range(thresholdV1 + 1, 2 * NE): threshold1 = thresholdV1 / (NE * 1.0) threshold2 = thresholdV2 / (NE * 1.0) if threshold1 < np.min(yPredR): continue if threshold2 > np.max(yPredR): continue yPred = (yPredR >= threshold1) * 1 yPred = (yPredR >= threshold2) * 1 + yPred cKappa = skllMetrics.kappa(self.y, yPred) if cKappa > bestKappa: bestKappa, bthreshold1, bthreshold2 = cKappa, threshold1, threshold2 return bestKappa, bthreshold1, bthreshold2
def get_best_kappa2( self, NE, yPredR ): #This function returns the thresholds that produce the highest kappa #when the score can vary from 0 to 2 bestKappa, bthreshold1, bthreshold2 = 0, 0, 0 yPred = yPredR for thresholdV1 in range( 1, 2 * NE ): for thresholdV2 in range( thresholdV1 + 1 , 2 * NE): threshold1 = thresholdV1 / ( NE * 1.0 ) threshold2 = thresholdV2 / ( NE * 1.0 ) if threshold1 < np.min( yPredR ): continue if threshold2 > np.max( yPredR ): continue yPred = ( yPredR >= threshold1 ) * 1 yPred = ( yPredR >= threshold2 ) * 1 + yPred cKappa = skllMetrics.kappa( self.y, yPred ) if cKappa > bestKappa: bestKappa, bthreshold1, bthreshold2 = cKappa, threshold1, threshold2 return bestKappa, bthreshold1, bthreshold2
def evaluateCl2(self, model, SVM=False): #This function evaluates Logistic Regression or Support Vector Machine Model when the possible #scores may vary from 0 to 2. It returns the threshold and current parameters that produce the highest kappa. NE = 20 if SVM: bestKappa, bthreshold1, bthreshold2 = self.evaluateSV2(model) return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2 yProb1, yProb2 = self.cv_estimateCl2(model, SVM) bestKappa, bthreshold1, bthreshold2 = 0, 0.5, 0.5 for thresholdV1 in range(1, NE): for thresholdV2 in range(1, NE): threshold1 = thresholdV1 / (NE * 1.0) threshold2 = thresholdV2 / (NE * 1.0) yPred1 = (yProb1 > threshold1) * 1 yPred2 = (yProb2 > threshold2) * 2 yPred = np.maximum(yPred1, yPred2) ckappa = skllMetrics.kappa(self.y, yPred) if ckappa > bestKappa: bestKappa, bthreshold1, bthreshold2 = ckappa, threshold1, threshold2 return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
def evaluateCl2( self, model , SVM = False): #This function evaluates Logistic Regression or Support Vector Machine Model when the possible #scores may vary from 0 to 2. It returns the threshold and current parameters that produce the highest kappa. NE = 20 if SVM: bestKappa, bthreshold1, bthreshold2 = self.evaluateSV2( model ) return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2 yProb1, yProb2 = self.cv_estimateCl2( model , SVM) bestKappa, bthreshold1, bthreshold2 = 0, 0.5, 0.5 for thresholdV1 in range(1,NE): for thresholdV2 in range(1,NE): threshold1 = thresholdV1 / ( NE * 1.0 ) threshold2 = thresholdV2 / ( NE * 1.0 ) yPred1 = ( yProb1 > threshold1 ) * 1 yPred2 = ( yProb2 > threshold2 ) * 2 yPred = np.maximum( yPred1, yPred2) ckappa = skllMetrics.kappa( self.y, yPred ) if ckappa > bestKappa: bestKappa, bthreshold1, bthreshold2 = ckappa, threshold1, threshold2 return bestKappa, model.param1, model.param2, bthreshold1, bthreshold2
def evaluateGB1(self, model, nTree, fixed=False): #This function evaluates the Gradient Boosting Machine Model when the possible #scores may be 0 or 1. It returns the tree and threshold that produce the highest kappa. yProbL = self.cv_estimateGB1(model, nTree) bestKappa, bthreshold, bTree = 0, 0, 0 NE = 20 treeCandidates = [nTree - 1] if fixed else [ 20, 30, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 750, 1000, 1250, 1500 ] for j in treeCandidates: cTree = j - 1 yProb = yProbL[cTree] for div in range(1, 20): threshold = div / 100.0 yPred = yProb > threshold ckappa = skllMetrics.kappa(self.y, yPred) if ckappa > bestKappa: bestKappa, bthreshold, bTree = ckappa, threshold, cTree return bestKappa, bTree, model.param2, bthreshold