Exemple #1
0
 def createStatObj(self, results=None, exp_pred=None, responseType=None, nExtFolds=None):
     #Initialize res (statObj) for statistic results
     res = {}
     # Classification
     res["CA"] = None
     res["CM"] = None
     res["MCC"] = None
     #Regression
     res["R2"] = None
     res["RMSE"] = None
     #Both
     res["StabilityValue"] = None
     res["foldStat"] = {
             #Regression
             "R2"   : None,
             "RMSE" : None,
             #Classification
             "CM"   : None,
             "CA"   : None,
             "MCC"  : None }
     if results is None or exp_pred is None or responseType is None or nExtFolds is None:
         return res 
     #Calculate the (R2, RMSE) or (CM, CA) results depending on Classification or regression
     if responseType == "Classification":
         #Compute CA
         res["CA"] = sum(r[0] for r in results) / self.nExtFolds
         #Compute CM
         res["CM"] = results[0][1]                      # Get the first ConfMat
         for r in results[1:]:
             for Lidx,line in enumerate(r[1]):
                 for idx,val in enumerate(line):
                     res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val   #Add each same ConfMat position
         #Compute MCC 
         res["MCC"] = evalUtilities.calcMCC(res["CM"])
         #Compute foldStat
         res["foldStat"]["CA"] = [r[0] for r in results]
         res["foldStat"]["CM"] = [r[1] for r in results]
         res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
         #Compute Stability
         res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
     else:
         #compute R2
         res["R2"] = evalUtilities.calcRsqrt(exp_pred)
         #compute RMSE
         res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
         #Compute foldStat
         res["foldStat"]["RMSE"] = [r[0] for r in results]
         res["foldStat"]["R2"] = [r[1] for r in results]
         #Compute Stability
         res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["R2"])
     return res
def predict(model, test, fps, fpsTest, label, thrs):

    # Predict test set using AD
    fid = open("predictions_"+label+".txt", "w")
    fid.write("Pred\tProb\tActual\tCorrect\n")
    fid.close()
    CM = [[0, 0], [0, 0]]
    outAD = 0
    for idx in range(len(test)):
        predList = model(test[idx], returnDFV = True)
        pred = predList[0].value
        prob = predList[1]
        actual = test[idx]["BioActivity"].value
        if pred == actual:
            correct = True
        else:
            correct = False
    
        # Calculate the median of the topological distance to the 10 NN in the train set
        dist = getDist(fpsTest[idx], fps)
    
        if dist > thrs:
            CM = getCM(pred, actual, CM)
            print "pred, prob, actual, correct  ", pred, prob, actual, correct
            fid = open("predictions_"+label+".txt", "a")
            fid.write(pred+"\t"+str(prob)+"\t"+actual+"\t"+str(correct)+"\n")
            fid.close()
        else:
            outAD = outAD + 1
    
    print CM
    MCC = round(evalUtilities.calcMCC(CM),3)
    print "MCC of test set ", MCC
    print "Fraction of outAD in test set ", float(outAD)/len(test)
    return MCC, float(outAD)/len(test)
Exemple #3
0
def printTestSetAcc(Model, test, learners, resultsFid, projectName, isRand):

    TL = 0
    TH = 0
    FL = 0
    FH = 0
    for ex in test:
        pred = Model(ex)
        actual = ex.get_class()
        #print pred, actual
        if actual == "Low":
            if pred == "Low":
                TL = TL + 1
            elif pred == "High":
                FH = FH + 1
        elif actual == "High":
            if pred == "High":
                TH = TH + 1
            elif pred == "Low":
                FL = FL + 1
    print "TH, TL, FH, FL"
    print TH, TL, FH, FL
    CM = [[TH, FL], [FH, TL]]
    CA = round(float(TH+TL)/(TH+TL+FH+FL),3)
    print CA
    MCC = round(evalUtilities.calcMCC(CM), 3)
    print "MCC ", MCC
    if isRand:
        resultsFid.write("NO_"+projectName+"_randTest\t"+str(TH)+"\t"+str(TL)+"\t"+str(FH)+"\t"+str(FL)+"\t"+str(CA)+"\t"+str(MCC)+"\n" )
    else:
        resultsFid.write(projectName+"_test\t"+str(TH)+"\t"+str(TL)+"\t"+str(FH)+"\t"+str(FL)+"\t"+str(CA)+"\t"+str(MCC)+"\n" )
    
    return round(MCC,3)
    def createStatObj(
        self,
        results=None,
        exp_pred=None,
        nTrainCmpds=None,
        nTestCmpds=None,
        responseType=None,
        nExtFolds=None,
        userAlert="",
        rocs=None,
    ):
        # Initialize res (statObj) for statistic results
        res = {}
        self.__log("Starting to create Stat Obj")
        # Classification
        res["CA"] = None
        res["CM"] = None
        res["MCC"] = None
        res["ROC"] = None
        # Regression
        res["Q2"] = None
        res["RMSE"] = None
        # Both
        res["StabilityValue"] = None
        res["userAlert"] = userAlert
        res["selected"] = False
        res["stable"] = False
        res["responseType"] = False
        res["foldStat"] = {
            "nTrainCmpds": None,
            "nTestCmpds": None,
            # Regression
            "Q2": None,
            "RMSE": None,
            # Classification
            "CM": None,
            "CA": None,
            "MCC": None,
            "ROC": None,
        }
        if (
            results is None
        ):  # or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
            self.__log("    NONE...")
            return res
        res["responseType"] = responseType
        # Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
        if responseType == "Classification":
            # Compute CA
            res["CA"] = sum(r[0] for r in results) / nExtFolds
            # Compute CM
            res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
            for r in results[1:]:
                for Lidx, line in enumerate(r[1]):
                    for idx, val in enumerate(line):
                        res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val  # Add each same ConfMat position
            # Compute MCC
            res["MCC"] = evalUtilities.calcMCC(res["CM"])
            # Compute ROC
            res["ROC"] = sum(ro[0] for ro in rocs) / self.nExtFolds
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["CA"] = [r[0] for r in results]
            res["foldStat"]["CM"] = [r[1] for r in results]
            res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
            res["foldStat"]["ROC"] = [ro for ro in rocs]
            # Compute Stability
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
        else:
            # compute Q2
            res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
            # compute RMSE
            res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["RMSE"] = [r[0] for r in results]
            res["foldStat"]["Q2"] = [r[1] for r in results]
            # Compute Stability value
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])
        # Evaluate stability of ML
        StabilityValue = res["StabilityValue"]
        if StabilityValue is not None:
            if responseType == "Classification":
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
            else:
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
            if StabilityValue < stableTH:  # Select only stable models
                res["stable"] = True

        return res
Exemple #5
0
def createStatObj(results=None,
                  exp_pred=None,
                  nTrainCmpds=None,
                  nTestCmpds=None,
                  responseType=None,
                  nExtFolds=None,
                  userAlert="",
                  foldSelectedML=None):
    #Initialize res (statObj) for statistic results
    res = {}
    # Classification
    res["CA"] = None
    res["CM"] = None
    res["MCC"] = None
    #Regression
    res["Q2"] = None
    res["RMSE"] = None
    #Both
    res["StabilityValue"] = None
    res["userAlert"] = userAlert
    res["selected"] = False
    res["stable"] = False
    res["responseType"] = False
    res["foldStat"] = {
        "nTrainCmpds": None,
        "nTestCmpds": None,
        #Regression
        "Q2": None,
        "RMSE": None,
        #Classification
        "CM": None,
        "CA": None,
        "MCC": None
    }
    if not results or results is None or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
        return res
    res["responseType"] = responseType
    #Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
    if responseType == "Classification":
        #Compute CA
        res["CA"] = sum(r[0] for r in results) / nExtFolds
        #Compute CM
        res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
        for r in results[1:]:
            for Lidx, line in enumerate(r[1]):
                for idx, val in enumerate(line):
                    res["CM"][Lidx][idx] = res["CM"][Lidx][
                        idx] + val  #Add each same ConfMat position
        #Compute MCC
        res["MCC"] = evalUtilities.calcMCC(res["CM"])
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["CA"] = [r[0] for r in results]
        res["foldStat"]["CM"] = [r[1] for r in results]
        res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
        #Compute Stability
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
    else:
        #compute Q2
        res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
        #compute RMSE
        res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["RMSE"] = [r[0] for r in results]
        res["foldStat"]["Q2"] = [r[1] for r in results]
        #Compute Stability value
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])

    # Save selectedMLs if passed
    if foldSelectedML:
        res["foldStat"]["foldSelectedML"] = [ml for ml in foldSelectedML]

    #Evaluate stability of ML
    StabilityValue = res["StabilityValue"]
    if StabilityValue is not None:
        if responseType == "Classification":
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
        else:
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
        if StabilityValue < stableTH:  # Select only stable models
            res["stable"] = True

    return res
data = dataUtilities.attributeDeselectionData(data, descList)
print "Length domain ", len(data.domain)

learner = AZorngCvSVM.CvSVMLearner(C=32, gamma=0.03125)
#learner = AZorngRF.RFLearner()
#learner = AZorngRF.RFLearner(stratify = "Yes") # No effect
#learner = AZorngCvBoost.CvBoostLearner()
#learner.stratify = "Yes" # No effect
#learner.priors = {"Active":0.80, "Inactive":0.20}

# Test set accuracy
model = learner(data)
res = orngTest.testOnData([model], data)
CM = evalUtilities.ConfMat(res)[0]
CA = round(orngStat.CA(res)[0], 3)
MCC = round(evalUtilities.calcMCC(CM), 3)
# TH, FL, FH, TL
resList = [
    str(CM[0][0]),
    str(CM[0][1]),
    str(CM[1][0]),
    str(CM[1][1]),
    str(CA),
    str(MCC)
]
wrtStr = string.join(resList, "\t")
print "nonIID test set results"
print wrtStr

# CV accuracy
res = orngTest.crossValidation(