def createStatObj(self, results=None, exp_pred=None, responseType=None, nExtFolds=None): #Initialize res (statObj) for statistic results res = {} # Classification res["CA"] = None res["CM"] = None res["MCC"] = None #Regression res["R2"] = None res["RMSE"] = None #Both res["StabilityValue"] = None res["foldStat"] = { #Regression "R2" : None, "RMSE" : None, #Classification "CM" : None, "CA" : None, "MCC" : None } if results is None or exp_pred is None or responseType is None or nExtFolds is None: return res #Calculate the (R2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": #Compute CA res["CA"] = sum(r[0] for r in results) / self.nExtFolds #Compute CM res["CM"] = results[0][1] # Get the first ConfMat for r in results[1:]: for Lidx,line in enumerate(r[1]): for idx,val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val #Add each same ConfMat position #Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) #Compute foldStat res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: #compute R2 res["R2"] = evalUtilities.calcRsqrt(exp_pred) #compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) #Compute foldStat res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["R2"] = [r[1] for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["R2"]) return res
def predict(model, test, fps, fpsTest, label, thrs): # Predict test set using AD fid = open("predictions_"+label+".txt", "w") fid.write("Pred\tProb\tActual\tCorrect\n") fid.close() CM = [[0, 0], [0, 0]] outAD = 0 for idx in range(len(test)): predList = model(test[idx], returnDFV = True) pred = predList[0].value prob = predList[1] actual = test[idx]["BioActivity"].value if pred == actual: correct = True else: correct = False # Calculate the median of the topological distance to the 10 NN in the train set dist = getDist(fpsTest[idx], fps) if dist > thrs: CM = getCM(pred, actual, CM) print "pred, prob, actual, correct ", pred, prob, actual, correct fid = open("predictions_"+label+".txt", "a") fid.write(pred+"\t"+str(prob)+"\t"+actual+"\t"+str(correct)+"\n") fid.close() else: outAD = outAD + 1 print CM MCC = round(evalUtilities.calcMCC(CM),3) print "MCC of test set ", MCC print "Fraction of outAD in test set ", float(outAD)/len(test) return MCC, float(outAD)/len(test)
def printTestSetAcc(Model, test, learners, resultsFid, projectName, isRand): TL = 0 TH = 0 FL = 0 FH = 0 for ex in test: pred = Model(ex) actual = ex.get_class() #print pred, actual if actual == "Low": if pred == "Low": TL = TL + 1 elif pred == "High": FH = FH + 1 elif actual == "High": if pred == "High": TH = TH + 1 elif pred == "Low": FL = FL + 1 print "TH, TL, FH, FL" print TH, TL, FH, FL CM = [[TH, FL], [FH, TL]] CA = round(float(TH+TL)/(TH+TL+FH+FL),3) print CA MCC = round(evalUtilities.calcMCC(CM), 3) print "MCC ", MCC if isRand: resultsFid.write("NO_"+projectName+"_randTest\t"+str(TH)+"\t"+str(TL)+"\t"+str(FH)+"\t"+str(FL)+"\t"+str(CA)+"\t"+str(MCC)+"\n" ) else: resultsFid.write(projectName+"_test\t"+str(TH)+"\t"+str(TL)+"\t"+str(FH)+"\t"+str(FL)+"\t"+str(CA)+"\t"+str(MCC)+"\n" ) return round(MCC,3)
def createStatObj( self, results=None, exp_pred=None, nTrainCmpds=None, nTestCmpds=None, responseType=None, nExtFolds=None, userAlert="", rocs=None, ): # Initialize res (statObj) for statistic results res = {} self.__log("Starting to create Stat Obj") # Classification res["CA"] = None res["CM"] = None res["MCC"] = None res["ROC"] = None # Regression res["Q2"] = None res["RMSE"] = None # Both res["StabilityValue"] = None res["userAlert"] = userAlert res["selected"] = False res["stable"] = False res["responseType"] = False res["foldStat"] = { "nTrainCmpds": None, "nTestCmpds": None, # Regression "Q2": None, "RMSE": None, # Classification "CM": None, "CA": None, "MCC": None, "ROC": None, } if ( results is None ): # or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None: self.__log(" NONE...") return res res["responseType"] = responseType # Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": # Compute CA res["CA"] = sum(r[0] for r in results) / nExtFolds # Compute CM res["CM"] = copy.deepcopy(results[0][1]) # Get the first ConfMat for r in results[1:]: for Lidx, line in enumerate(r[1]): for idx, val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val # Add each same ConfMat position # Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) # Compute ROC res["ROC"] = sum(ro[0] for ro in rocs) / self.nExtFolds # Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] res["foldStat"]["ROC"] = [ro for ro in rocs] # Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: # compute Q2 res["Q2"] = evalUtilities.calcRsqrt(exp_pred) # compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) # Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["Q2"] = [r[1] for r in results] # Compute Stability value res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"]) # Evaluate stability of ML StabilityValue = res["StabilityValue"] if StabilityValue is not None: if responseType == "Classification": if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H else: if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H if StabilityValue < stableTH: # Select only stable models res["stable"] = True return res
def createStatObj(results=None, exp_pred=None, nTrainCmpds=None, nTestCmpds=None, responseType=None, nExtFolds=None, userAlert="", foldSelectedML=None): #Initialize res (statObj) for statistic results res = {} # Classification res["CA"] = None res["CM"] = None res["MCC"] = None #Regression res["Q2"] = None res["RMSE"] = None #Both res["StabilityValue"] = None res["userAlert"] = userAlert res["selected"] = False res["stable"] = False res["responseType"] = False res["foldStat"] = { "nTrainCmpds": None, "nTestCmpds": None, #Regression "Q2": None, "RMSE": None, #Classification "CM": None, "CA": None, "MCC": None } if not results or results is None or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None: return res res["responseType"] = responseType #Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": #Compute CA res["CA"] = sum(r[0] for r in results) / nExtFolds #Compute CM res["CM"] = copy.deepcopy(results[0][1]) # Get the first ConfMat for r in results[1:]: for Lidx, line in enumerate(r[1]): for idx, val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][ idx] + val #Add each same ConfMat position #Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) #Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: #compute Q2 res["Q2"] = evalUtilities.calcRsqrt(exp_pred) #compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) #Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["Q2"] = [r[1] for r in results] #Compute Stability value res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"]) # Save selectedMLs if passed if foldSelectedML: res["foldStat"]["foldSelectedML"] = [ml for ml in foldSelectedML] #Evaluate stability of ML StabilityValue = res["StabilityValue"] if StabilityValue is not None: if responseType == "Classification": if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H else: if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H if StabilityValue < stableTH: # Select only stable models res["stable"] = True return res
data = dataUtilities.attributeDeselectionData(data, descList) print "Length domain ", len(data.domain) learner = AZorngCvSVM.CvSVMLearner(C=32, gamma=0.03125) #learner = AZorngRF.RFLearner() #learner = AZorngRF.RFLearner(stratify = "Yes") # No effect #learner = AZorngCvBoost.CvBoostLearner() #learner.stratify = "Yes" # No effect #learner.priors = {"Active":0.80, "Inactive":0.20} # Test set accuracy model = learner(data) res = orngTest.testOnData([model], data) CM = evalUtilities.ConfMat(res)[0] CA = round(orngStat.CA(res)[0], 3) MCC = round(evalUtilities.calcMCC(CM), 3) # TH, FL, FH, TL resList = [ str(CM[0][0]), str(CM[0][1]), str(CM[1][0]), str(CM[1][1]), str(CA), str(MCC) ] wrtStr = string.join(resList, "\t") print "nonIID test set results" print wrtStr # CV accuracy res = orngTest.crossValidation(