def createStatObj(self, results=None, exp_pred=None, responseType=None, nExtFolds=None): #Initialize res (statObj) for statistic results res = {} # Classification res["CA"] = None res["CM"] = None res["MCC"] = None #Regression res["R2"] = None res["RMSE"] = None #Both res["StabilityValue"] = None res["foldStat"] = { #Regression "R2" : None, "RMSE" : None, #Classification "CM" : None, "CA" : None, "MCC" : None } if results is None or exp_pred is None or responseType is None or nExtFolds is None: return res #Calculate the (R2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": #Compute CA res["CA"] = sum(r[0] for r in results) / self.nExtFolds #Compute CM res["CM"] = results[0][1] # Get the first ConfMat for r in results[1:]: for Lidx,line in enumerate(r[1]): for idx,val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val #Add each same ConfMat position #Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) #Compute foldStat res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: #compute R2 res["R2"] = evalUtilities.calcRsqrt(exp_pred) #compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) #Compute foldStat res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["R2"] = [r[1] for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["R2"]) return res
def createStatObj( self, results=None, exp_pred=None, nTrainCmpds=None, nTestCmpds=None, responseType=None, nExtFolds=None, userAlert="", rocs=None, ): # Initialize res (statObj) for statistic results res = {} self.__log("Starting to create Stat Obj") # Classification res["CA"] = None res["CM"] = None res["MCC"] = None res["ROC"] = None # Regression res["Q2"] = None res["RMSE"] = None # Both res["StabilityValue"] = None res["userAlert"] = userAlert res["selected"] = False res["stable"] = False res["responseType"] = False res["foldStat"] = { "nTrainCmpds": None, "nTestCmpds": None, # Regression "Q2": None, "RMSE": None, # Classification "CM": None, "CA": None, "MCC": None, "ROC": None, } if ( results is None ): # or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None: self.__log(" NONE...") return res res["responseType"] = responseType # Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": # Compute CA res["CA"] = sum(r[0] for r in results) / nExtFolds # Compute CM res["CM"] = copy.deepcopy(results[0][1]) # Get the first ConfMat for r in results[1:]: for Lidx, line in enumerate(r[1]): for idx, val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val # Add each same ConfMat position # Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) # Compute ROC res["ROC"] = sum(ro[0] for ro in rocs) / self.nExtFolds # Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] res["foldStat"]["ROC"] = [ro for ro in rocs] # Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: # compute Q2 res["Q2"] = evalUtilities.calcRsqrt(exp_pred) # compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) # Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["Q2"] = [r[1] for r in results] # Compute Stability value res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"]) # Evaluate stability of ML StabilityValue = res["StabilityValue"] if StabilityValue is not None: if responseType == "Classification": if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H else: if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H if StabilityValue < stableTH: # Select only stable models res["stable"] = True return res
def createStatObj(results=None, exp_pred=None, nTrainCmpds=None, nTestCmpds=None, responseType=None, nExtFolds=None, userAlert="", foldSelectedML=None): #Initialize res (statObj) for statistic results res = {} # Classification res["CA"] = None res["CM"] = None res["MCC"] = None #Regression res["Q2"] = None res["RMSE"] = None #Both res["StabilityValue"] = None res["userAlert"] = userAlert res["selected"] = False res["stable"] = False res["responseType"] = False res["foldStat"] = { "nTrainCmpds": None, "nTestCmpds": None, #Regression "Q2": None, "RMSE": None, #Classification "CM": None, "CA": None, "MCC": None } if not results or results is None or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None: return res res["responseType"] = responseType #Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression if responseType == "Classification": #Compute CA res["CA"] = sum(r[0] for r in results) / nExtFolds #Compute CM res["CM"] = copy.deepcopy(results[0][1]) # Get the first ConfMat for r in results[1:]: for Lidx, line in enumerate(r[1]): for idx, val in enumerate(line): res["CM"][Lidx][idx] = res["CM"][Lidx][ idx] + val #Add each same ConfMat position #Compute MCC res["MCC"] = evalUtilities.calcMCC(res["CM"]) #Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["CA"] = [r[0] for r in results] res["foldStat"]["CM"] = [r[1] for r in results] res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results] #Compute Stability res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"]) else: #compute Q2 res["Q2"] = evalUtilities.calcRsqrt(exp_pred) #compute RMSE res["RMSE"] = evalUtilities.calcRMSE(exp_pred) #Compute foldStat res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds] res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds] res["foldStat"]["RMSE"] = [r[0] for r in results] res["foldStat"]["Q2"] = [r[1] for r in results] #Compute Stability value res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"]) # Save selectedMLs if passed if foldSelectedML: res["foldStat"]["foldSelectedML"] = [ml for ml in foldSelectedML] #Evaluate stability of ML StabilityValue = res["StabilityValue"] if StabilityValue is not None: if responseType == "Classification": if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H else: if statc.mean(res["foldStat"]["nTestCmpds"]) > 50: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L else: stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H if StabilityValue < stableTH: # Select only stable models res["stable"] = True return res