Beispiel #1
0
 def createStatObj(self, results=None, exp_pred=None, responseType=None, nExtFolds=None):
     #Initialize res (statObj) for statistic results
     res = {}
     # Classification
     res["CA"] = None
     res["CM"] = None
     res["MCC"] = None
     #Regression
     res["R2"] = None
     res["RMSE"] = None
     #Both
     res["StabilityValue"] = None
     res["foldStat"] = {
             #Regression
             "R2"   : None,
             "RMSE" : None,
             #Classification
             "CM"   : None,
             "CA"   : None,
             "MCC"  : None }
     if results is None or exp_pred is None or responseType is None or nExtFolds is None:
         return res 
     #Calculate the (R2, RMSE) or (CM, CA) results depending on Classification or regression
     if responseType == "Classification":
         #Compute CA
         res["CA"] = sum(r[0] for r in results) / self.nExtFolds
         #Compute CM
         res["CM"] = results[0][1]                      # Get the first ConfMat
         for r in results[1:]:
             for Lidx,line in enumerate(r[1]):
                 for idx,val in enumerate(line):
                     res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val   #Add each same ConfMat position
         #Compute MCC 
         res["MCC"] = evalUtilities.calcMCC(res["CM"])
         #Compute foldStat
         res["foldStat"]["CA"] = [r[0] for r in results]
         res["foldStat"]["CM"] = [r[1] for r in results]
         res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
         #Compute Stability
         res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
     else:
         #compute R2
         res["R2"] = evalUtilities.calcRsqrt(exp_pred)
         #compute RMSE
         res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
         #Compute foldStat
         res["foldStat"]["RMSE"] = [r[0] for r in results]
         res["foldStat"]["R2"] = [r[1] for r in results]
         #Compute Stability
         res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["R2"])
     return res
    def createStatObj(
        self,
        results=None,
        exp_pred=None,
        nTrainCmpds=None,
        nTestCmpds=None,
        responseType=None,
        nExtFolds=None,
        userAlert="",
        rocs=None,
    ):
        # Initialize res (statObj) for statistic results
        res = {}
        self.__log("Starting to create Stat Obj")
        # Classification
        res["CA"] = None
        res["CM"] = None
        res["MCC"] = None
        res["ROC"] = None
        # Regression
        res["Q2"] = None
        res["RMSE"] = None
        # Both
        res["StabilityValue"] = None
        res["userAlert"] = userAlert
        res["selected"] = False
        res["stable"] = False
        res["responseType"] = False
        res["foldStat"] = {
            "nTrainCmpds": None,
            "nTestCmpds": None,
            # Regression
            "Q2": None,
            "RMSE": None,
            # Classification
            "CM": None,
            "CA": None,
            "MCC": None,
            "ROC": None,
        }
        if (
            results is None
        ):  # or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
            self.__log("    NONE...")
            return res
        res["responseType"] = responseType
        # Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
        if responseType == "Classification":
            # Compute CA
            res["CA"] = sum(r[0] for r in results) / nExtFolds
            # Compute CM
            res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
            for r in results[1:]:
                for Lidx, line in enumerate(r[1]):
                    for idx, val in enumerate(line):
                        res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val  # Add each same ConfMat position
            # Compute MCC
            res["MCC"] = evalUtilities.calcMCC(res["CM"])
            # Compute ROC
            res["ROC"] = sum(ro[0] for ro in rocs) / self.nExtFolds
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["CA"] = [r[0] for r in results]
            res["foldStat"]["CM"] = [r[1] for r in results]
            res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
            res["foldStat"]["ROC"] = [ro for ro in rocs]
            # Compute Stability
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
        else:
            # compute Q2
            res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
            # compute RMSE
            res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["RMSE"] = [r[0] for r in results]
            res["foldStat"]["Q2"] = [r[1] for r in results]
            # Compute Stability value
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])
        # Evaluate stability of ML
        StabilityValue = res["StabilityValue"]
        if StabilityValue is not None:
            if responseType == "Classification":
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
            else:
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
            if StabilityValue < stableTH:  # Select only stable models
                res["stable"] = True

        return res
Beispiel #3
0
def createStatObj(results=None,
                  exp_pred=None,
                  nTrainCmpds=None,
                  nTestCmpds=None,
                  responseType=None,
                  nExtFolds=None,
                  userAlert="",
                  foldSelectedML=None):
    #Initialize res (statObj) for statistic results
    res = {}
    # Classification
    res["CA"] = None
    res["CM"] = None
    res["MCC"] = None
    #Regression
    res["Q2"] = None
    res["RMSE"] = None
    #Both
    res["StabilityValue"] = None
    res["userAlert"] = userAlert
    res["selected"] = False
    res["stable"] = False
    res["responseType"] = False
    res["foldStat"] = {
        "nTrainCmpds": None,
        "nTestCmpds": None,
        #Regression
        "Q2": None,
        "RMSE": None,
        #Classification
        "CM": None,
        "CA": None,
        "MCC": None
    }
    if not results or results is None or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
        return res
    res["responseType"] = responseType
    #Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
    if responseType == "Classification":
        #Compute CA
        res["CA"] = sum(r[0] for r in results) / nExtFolds
        #Compute CM
        res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
        for r in results[1:]:
            for Lidx, line in enumerate(r[1]):
                for idx, val in enumerate(line):
                    res["CM"][Lidx][idx] = res["CM"][Lidx][
                        idx] + val  #Add each same ConfMat position
        #Compute MCC
        res["MCC"] = evalUtilities.calcMCC(res["CM"])
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["CA"] = [r[0] for r in results]
        res["foldStat"]["CM"] = [r[1] for r in results]
        res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
        #Compute Stability
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
    else:
        #compute Q2
        res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
        #compute RMSE
        res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["RMSE"] = [r[0] for r in results]
        res["foldStat"]["Q2"] = [r[1] for r in results]
        #Compute Stability value
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])

    # Save selectedMLs if passed
    if foldSelectedML:
        res["foldStat"]["foldSelectedML"] = [ml for ml in foldSelectedML]

    #Evaluate stability of ML
    StabilityValue = res["StabilityValue"]
    if StabilityValue is not None:
        if responseType == "Classification":
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
        else:
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
        if StabilityValue < stableTH:  # Select only stable models
            res["stable"] = True

    return res