Python mean 예제들, statc.mean Python 예제들

예제 #1

0

파일 보기

def selectModel(MLStatistics, logFile=None):
    """Return the model with highest Q2/CA amongst methods with a stability less than 0.1.
           If no methods is considered stable, select the method with the greatest Q2/CA
        """
    log(logFile, "Selecting MLmethod...")
    bestModelName = None
    bestRes = None
    bestStableVal = None
    #Select only from stable models
    for modelName in MLStatistics:
        StabilityValue = MLStatistics[modelName]["StabilityValue"]
        if StabilityValue is not None:
            if MLStatistics[modelName]["responseType"] == "Classification":
                if statc.mean(MLStatistics[modelName]["foldStat"]
                              ["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
            elif MLStatistics[modelName]["responseType"] == "Regression":
                if statc.mean(MLStatistics[modelName]["foldStat"]
                              ["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
            if StabilityValue < stableTH:
                valRes = max(MLStatistics[modelName]["Q2"],
                             MLStatistics[modelName]
                             ["CA"])  # One of them is always None
                if bestRes is None or valRes > bestRes:
                    bestRes = valRes
                    bestModelName = modelName
                    bestStableVal = StabilityValue
                elif valRes == bestRes and StabilityValue < bestStableVal:
                    bestRes = valRes
                    bestModelName = modelName
                    bestStableVal = StabilityValue

    # No stable models found! Selecting the one with best result still...
    if bestModelName is None:
        log(
            logFile,
            "  No stable models found! Selecting the one with best result still..."
        )
        for modelName in MLStatistics:
            valRes = max(
                MLStatistics[modelName]["Q2"],
                MLStatistics[modelName]["CA"])  # One of them is always None
            if bestRes is None or valRes > bestRes:
                bestRes = valRes
                bestModelName = modelName
        log(logFile, "  Selected the non-stable MLmethod: " + bestModelName)
    else:
        log(logFile, "  Selected the stable MLmethod: " + bestModelName)
    MLMethod = copy.deepcopy(MLStatistics[bestModelName])
    MLMethod["MLMethod"] = bestModelName
    MLStatistics[bestModelName]["selected"] = True
    return MLMethod

예제 #2

0

파일 보기

파일: AutoQSAR.py 프로젝트: johan-westin-work/AZOrange-python27port

def selectModel(MLStatistics, logFile = None):
        """Return the model with highest Q2/CA amongst methods with a stability less than 0.1.
           If no methods is considered stable, select the method with the greatest Q2/CA
        """
        log(logFile, "Selecting MLmethod...")
        bestModelName = None
        bestRes = None
        bestStableVal = None
        #Select only from stable models
        for modelName in MLStatistics:
            StabilityValue = MLStatistics[modelName]["StabilityValue"]
            if StabilityValue is not None:
                if MLStatistics[modelName]["responseType"] == "Classification": 
                    if statc.mean(MLStatistics[modelName]["foldStat"]["nTestCmpds"]) > 50:
                        stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                    else:
                        stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
                elif MLStatistics[modelName]["responseType"] == "Regression":
                    if statc.mean(MLStatistics[modelName]["foldStat"]["nTestCmpds"]) > 50:
                       stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                    else:
                        stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
                if StabilityValue < stableTH:
                    valRes = max( MLStatistics[modelName]["Q2"], MLStatistics[modelName]["CA"])  # One of them is always None
                    if bestRes is None or valRes > bestRes:
                        bestRes = valRes
                        bestModelName = modelName
                        bestStableVal = StabilityValue
                    elif valRes == bestRes and StabilityValue < bestStableVal:
                        bestRes = valRes
                        bestModelName = modelName
                        bestStableVal = StabilityValue
                    

        # No stable models found! Selecting the one with best result still... 
        if bestModelName is None:
            log(logFile, "  No stable models found! Selecting the one with best result still...")
            for modelName in MLStatistics:
                valRes = max( MLStatistics[modelName]["Q2"], MLStatistics[modelName]["CA"])  # One of them is always None
                if bestRes is None or valRes > bestRes:
                    bestRes = valRes
                    bestModelName = modelName
            log(logFile, "  Selected the non-stable MLmethod: " + bestModelName)
        else:
            log(logFile, "  Selected the stable MLmethod: " + bestModelName)
        MLMethod = MLStatistics[bestModelName].copy()
        MLMethod["MLMethod"] = bestModelName
        MLStatistics[bestModelName]["selected"] = True
        return MLMethod

예제 #3

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def calcRsqrt(exp_pred_Val):
    """Calculates the Rsqrt of the predicted values in exp_pred_Val[1] against the 
        respective experimental values in exp_pred_Val[0]         
        Input example:

        [ (ExperimentalValue1, PredictedValue1),        # In respect to 1st Ex
          (ExperimentalValue2, PredictedValue2),        # In respect to 2nd Ex
          (ExperimentalValue3, PredictedValue3),        # In respect to 3rd Ex
          (ExperimentalValue4, PredictedValue4),        # In respect to 4rd Ex
          ...                                           # ...
        ]
    """
    # Calc mean of the experimental response variable
    actualValuesList = []
    for val in exp_pred_Val:
        actualValuesList.append(val[0])
    testMean = statc.mean(actualValuesList)

    errSum = 0.0
    meanSum = 0.0
    for val in exp_pred_Val:
        errSum = errSum + math.pow(val[0] - string.atof(str(val[1])), 2)
        meanSum = meanSum + math.pow(testMean - val[0], 2)
    if not meanSum:
        Rsqrt = -999999
    else:
        Rsqrt = 1 - errSum / meanSum
    return Rsqrt

예제 #4

0

파일 보기

파일: evalUtilities.py 프로젝트: AZCompTox/AZOrange

def calcRsqrt(exp_pred_Val):
    """Calculates the Rsqrt of the predicted values in exp_pred_Val[1] against the 
        respective experimental values in exp_pred_Val[0]         
        Input example:

        [ (ExperimentalValue1, PredictedValue1),        # In respect to 1st Ex
          (ExperimentalValue2, PredictedValue2),        # In respect to 2nd Ex
          (ExperimentalValue3, PredictedValue3),        # In respect to 3rd Ex
          (ExperimentalValue4, PredictedValue4),        # In respect to 4rd Ex
          ...                                           # ...
        ]
    """
    # Calc mean of the experimental response variable
    actualValuesList = []
    for val in exp_pred_Val:
        actualValuesList.append(val[0])
    testMean = statc.mean(actualValuesList)

    errSum = 0.0
    meanSum = 0.0
    for val in exp_pred_Val:
        errSum = errSum + math.pow(val[0] - string.atof(str(val[1])),2)
        meanSum = meanSum + math.pow(testMean - val[0],2)
    if not meanSum:
        Rsqrt = -999999
    else:
        Rsqrt = 1 - errSum/meanSum
    return Rsqrt

예제 #5

0

파일 보기

파일: transform.py 프로젝트: JakaKokosar/orange-bio

 def _get_par(self, datao):
     gaussiane = [ estimate_gaussian_per_class(datao, at, common_if_extreme=True) for at in range(len(datao.domain.attributes)) ]
     normalizec = []
     for i,g in zip(range(len(datao.domain.attributes)), gaussiane):
         r = [ _llrlogratio(ex[i].value, *g) for ex in datao ]
         normalizec.append((mean(r), std(r)))
     return gaussiane, normalizec

예제 #6

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def getRMSEstd(res, nFolds):
    """
    Method for calculating the std of RMSE of nFolds in a crossvalidation (returned).
    res is the object containing the results from orngTest methods such as crossValidation.
    """

    # Initialize a list to contain lists of errors for each fold.
    errorList = []
    for idx in range(nFolds):
        errorList.append([])

    # ex contains info on the fold number, prediction and actural responses for exah example used in the CV
    # Append ex error to correct fold list
    for ex in res.results:
        error = (ex.classes[0] - ex.actualClass)**2
        errorList[ex.iterationNumber].append(error)

    # RMSE of the different folds
    RMSElist = []
    for idx in range(nFolds):
        average = sum(errorList[idx]) / len(errorList[idx])
        RMSElist.append(math.sqrt(average))
    RMSEstd = stats.stdev(RMSElist)
    RMSEmean = statc.mean(RMSElist)
    if verbose > 0:
        print str(RMSEmean) + "\t" + str(RMSEstd) + "\t" + string.join(
            [str(x) for x in RMSElist], "\t")
    return RMSEstd, RMSElist

예제 #7

0

파일 보기

파일: evalUtilities.py 프로젝트: pedroalmeida/AZOrange

def getRMSEstd(res, nFolds):
    """
    Method for calculating the std of RMSE of nFolds in a crossvalidation (returned).
    res is the object containing the results from orngTest methods such as crossValidation.
    """

    # Initialize a list to contain lists of errors for each fold.
    errorList = []
    for idx in range(nFolds):
        errorList.append([])

    # ex contains info on the fold number, prediction and actural responses for exah example used in the CV
    # Append ex error to correct fold list
    for ex in res.results:
         error = (ex.classes[0]- ex.actualClass)**2
         errorList[ex.iterationNumber].append(error)

    # RMSE of the different folds
    RMSElist = []
    for idx in range(nFolds):
        average =  sum(errorList[idx])/len(errorList[idx])
        RMSElist.append(math.sqrt(average))
    RMSEstd = stats.stdev(RMSElist)
    RMSEmean = statc.mean(RMSElist)
    if verbose > 0: print str(RMSEmean)+"\t"+str(RMSEstd)+"\t"+string.join( [str(x) for x in RMSElist], "\t")
    return RMSEstd, RMSElist

예제 #8

0

파일 보기

파일: evalUtilities.py 프로젝트: pedroalmeida/AZOrange

def Rsqrt_obsolete(res = None):
    """
    Calculates the R-squared (Coefficient of determination) of orngTest.ExperimentResults in res
    The results res must be from a learner
    """
    # If Called without arguments, return the type of problems this method can be used for: 
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type":REGRESSION}

    if res.numberOfIterations > 1:
        Rs = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        errSum = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        meanSum = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        means = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        nIter = [0]*res.numberOfIterations
        for tex in res.results:
            ac = float(tex.actualClass)
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                means[i][tex.iterationNumber] += ac
        for nit, it in enumerate(nIter):
            for i, cls in enumerate(tex.classes):
                means[i][nit] /=it

        for tex in res.results:
            ac = float(tex.actualClass)
            for i, cls in enumerate(tex.classes):
                errSum[i][tex.iterationNumber] += (float(cls) - ac)**2
                meanSum[i][tex.iterationNumber] += (means[i][tex.iterationNumber] - ac)**2
        for learner in range(res.numberOfLearners):
            for it in range(len(nIter)):
                if meanSum[learner][it]==0:
                    return "N/A"
                Rs[learner][it] = 1-(errSum[learner][it] / meanSum[learner][it])
        return [statc.mean(x) for x in Rs]

    else:
        RsqrtList=[]
        for nLearner in range(len(res.results[0].classes)):
            # Calc average of the prediction variable
            testMean = 0
            for ex in res.results:
                testMean = testMean + ex.actualClass
            testMean = testMean/len(res.results)
            errSum = 0.0
            meanSum = 0.0
            for ex in res.results:
                errSum = errSum + math.pow(ex.actualClass - ex.classes[nLearner],2)
                meanSum = meanSum + math.pow(testMean - ex.actualClass,2)
            if meanSum==0:
                return "N/A"
            RsqrtList.append(1 - errSum/meanSum)
        return RsqrtList

예제 #9

0

파일 보기

 def _get_par(self, datao):
     gaussiane = [
         estimate_gaussian_per_class(datao, at, common_if_extreme=True)
         for at in range(len(datao.domain.attributes))
     ]
     normalizec = []
     for i, g in zip(range(len(datao.domain.attributes)), gaussiane):
         r = [_llrlogratio(ex[i].value, *g) for ex in datao]
         normalizec.append((mean(r), std(r)))
     return gaussiane, normalizec

예제 #10

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def RMSE_obsolete(res=None):
    """
    Calculates the Root Mean Squared Error of orngTest.ExperimentResults in res
    The results res must be from a regressor
    """
    # If Called without arguments, return the type of problems this method can be used for:
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type": REGRESSION}

    if res.numberOfIterations > 1:
        MSEs = [[0.0] * res.numberOfIterations
                for i in range(res.numberOfLearners)]
        nIter = [0] * res.numberOfIterations
        for tex in res.results:
            ac = float(tex.actualClass)
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                MSEs[i][tex.iterationNumber] += (float(cls) - ac)**2
        MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs]
        MSEs = [[math.sqrt(x) for x in y] for y in MSEs]

        # Print output from each fold to tem file
        RMSEfoldList = MSEs
        RMSE = [statc.mean(x) for x in RMSEfoldList]
        RMSEstd = stats.stdev(RMSEfoldList[0])
        #print str(RMSE[0])+"\t"+str(RMSEstd)+"\t"+string.join( [str(x) for x in RMSEfoldList[0]] , "\t")

        return [round(statc.mean(x), 2) for x in MSEs]

    else:
        MSEs = [0.0] * res.numberOfLearners
        for tex in res.results:
            MSEs = map(lambda res, cls, ac=float(tex.actualClass): res +
                       (float(cls) - ac)**2,
                       MSEs,
                       tex.classes)

        MSEs = [x / (len(res.results)) for x in MSEs]
        return [round(math.sqrt(x), 2) for x in MSEs]

예제 #11

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def CA_obsolete(res=None, returnFoldStat=False):
    """
    Calculates the classification Accuracy of orngTest.ExperimentResults in res
    The results res must be from a classifier
    """
    # If Called without arguments, return the type of problems this method can be used for:
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type": CLASSIFICATION}

    if res.numberOfIterations > 1:
        CAs = [[0.0] * res.numberOfIterations
               for i in range(res.numberOfLearners)]
        nIter = [0] * res.numberOfIterations
        for tex in res.results:
            ac = tex.actualClass
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                if cls == ac:
                    CAs[i][tex.iterationNumber] += 1
        CAs = [[x / ni for x, ni in zip(y, nIter)] for y in CAs]

        CAfoldList = CAs
        CA = [statc.mean(x) for x in CAs]
        CAstd = stats.stdev(CAfoldList[0])

        if returnFoldStat:
            return [round(statc.mean(x), 3) for x in CAs], CAfoldList
        else:
            return [round(statc.mean(x), 3) for x in CAs]

    else:
        CAs = [0.0] * res.numberOfLearners
        for tex in res.results:
            CAs = map(lambda res, cls, ac=tex.actualClass: res + types.IntType(
                cls == ac),
                      CAs,
                      tex.classes)
        return [round(x / (len(res.results)), 3) for x in CAs]

예제 #12

0

파일 보기

파일: transform.py 프로젝트: acopar/orange-bio

def estimate_gaussian_per_class(data,
                                i,
                                a=None,
                                b=None,
                                common_if_extreme=False):
    cv = data.domain.class_var

    if a == None: a = cv.values[0]
    if b == None: b = cv.values[1]

    def avWCVal(value):
        return [
            ex[i].value for ex in data
            if ex[-1].value == value and not ex[i].isSpecial()
        ]

    list1 = avWCVal(a)
    list2 = avWCVal(b)

    mi1 = mi2 = st1 = st2 = None

    try:
        mi1 = statc.mean(list1)
        st1 = statc.std(list1)
    except:
        pass

    try:
        mi2 = statc.mean(list2)
        st2 = statc.std(list2)
    except:
        pass

    def extreme():
        return st1 == 0 or st2 == 0

    if common_if_extreme and extreme():
        st1 = st2 = statc.std(list1 + list2)

    return mi1, st1, mi2, st2

예제 #13

0

파일 보기

파일: evalUtilities.py 프로젝트: pedroalmeida/AZOrange

def RMSE_obsolete(res = None):
    """
    Calculates the Root Mean Squared Error of orngTest.ExperimentResults in res
    The results res must be from a regressor
    """
    # If Called without arguments, return the type of problems this method can be used for: 
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type":REGRESSION}

    if res.numberOfIterations > 1:
        MSEs = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        nIter = [0]*res.numberOfIterations
        for tex in res.results:
            ac = float(tex.actualClass)
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                MSEs[i][tex.iterationNumber] += (float(cls) - ac)**2
        MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs]
        MSEs = [[math.sqrt(x) for x in y] for y in MSEs]

        # Print output from each fold to tem file
        RMSEfoldList = MSEs
        RMSE = [statc.mean(x) for x in RMSEfoldList]
        RMSEstd = stats.stdev(RMSEfoldList[0])
        #print str(RMSE[0])+"\t"+str(RMSEstd)+"\t"+string.join( [str(x) for x in RMSEfoldList[0]] , "\t")

        return [round(statc.mean(x),2) for x in MSEs]

    else:
        MSEs = [0.0]*res.numberOfLearners
        for tex in res.results:
            MSEs = map(lambda res, cls, ac = float(tex.actualClass):
                       res + (float(cls) - ac)**2, MSEs, tex.classes)

        MSEs = [x/(len(res.results)) for x in MSEs]
        return [round(math.sqrt(x),2)  for x in MSEs]

예제 #14

0

파일 보기

파일: evalUtilities.py 프로젝트: pedroalmeida/AZOrange

def CA_obsolete(res = None, returnFoldStat = False):
    """
    Calculates the classification Accuracy of orngTest.ExperimentResults in res
    The results res must be from a classifier
    """
    # If Called without arguments, return the type of problems this method can be used for: 
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type":CLASSIFICATION}

    if res.numberOfIterations > 1:
        CAs = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)]
        nIter = [0]*res.numberOfIterations
        for tex in res.results:
            ac = tex.actualClass
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                if cls == ac:
                    CAs[i][tex.iterationNumber] += 1
        CAs = [[x/ni for x, ni in zip(y, nIter)] for y in CAs]

        CAfoldList = CAs
        CA = [statc.mean(x) for x in CAs]
        CAstd = stats.stdev(CAfoldList[0])

        if returnFoldStat:
            return [round(statc.mean(x),3) for x in CAs], CAfoldList
        else:
            return [round(statc.mean(x),3) for x in CAs]

    else:
        CAs = [0.0]*res.numberOfLearners
        for tex in res.results:
            CAs = map(lambda res, cls, ac = tex.actualClass:
                       res + types.IntType(cls == ac), CAs, tex.classes)
        return [round(x/(len(res.results)),3) for x in CAs]

예제 #15

0

파일 보기

    def build_feature(self, data, gs):

        at = Orange.feature.Continuous(name=str(gs))
        geneset = list(gs.genes)

        nm, name_ind, genes, takegenes, to_geneset = self._match_data(
            data, geneset, odic=True)

        gsi = [name_ind[g] for g in genes]
        gausse = compute_llr(data, gsi, self._gauss_cache)
        genes_gs = [to_geneset[g] for g in genes]

        if self.normalize:  # per (3) in the paper
            #compute log ratios for all samples and genes from this gene set
            for i, gene_gs, g in zip(gsi, genes_gs, gausse):
                if gene_gs not in self._normalizec:  #skip if computed already
                    r = [_llrlogratio(ex[i].value, *g) for ex in data]
                    self._normalizec[gene_gs] = (mean(r), std(r))

        def t(ex,
              w,
              genes_gs=genes_gs,
              gausse=gausse,
              normalizec=self._normalizec):
            nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None)
            gsvalues = [vou(ex, gn, name_ind2) for gn in genes2]

            vals = [
                _llrlogratio(v, *g) if v != "?" else 0.0
                for v, g in zip(gsvalues, gausse)
            ]

            if len(normalizec):  #normalize according to (3)
                vals2 = []
                for v, g in zip(vals, genes_gs):
                    m, s = normalizec[g]
                    if s == 0:  #disregard attributes without differences
                        vals2.append(0.)
                    else:
                        vals2.append((v - m) / s)
                vals = vals2

            return sum(vals)

        at.get_value_from = t
        return at

예제 #16

0

파일 보기

파일: evalUtilities.py 프로젝트: johan-westin-work/AZOrange-python27port

def calcRsqrt(exp_pred_Val):
    """Calculates the Rsqrt of the predicted values in exp_pred_Val[1] against the 
        respective experimental values in exp_pred_Val[0]         
    """
    # Calc mean of the experimental response variable
    actualValuesList = []
    for val in exp_pred_Val:
        actualValuesList.append(val[0].value)
    testMean = statc.mean(actualValuesList)

    errSum = 0.0
    meanSum = 0.0
    for val in exp_pred_Val:
        errSum = errSum + math.pow(val[0] - string.atof(str(val[1])),2)
        meanSum = meanSum + math.pow(testMean - val[0],2)

    Rsqrt = 1 - errSum/meanSum
    return Rsqrt

예제 #17

0

파일 보기

파일: evalUtilities.py 프로젝트: egonw/AZOrange

def getRsqrt(testData, predictor):
    """Calculate the coefficient of determination (R-squared) for the orange model predictor on the data set testData. 
        R^2 = 1 - sum((pred - actual)^2)/(sum((mean - actual)^2))"""

    # Calc average of the prediction variable
    predValuesList = []
    for ex in testData:
        predValuesList.append(ex[ex.domain.classVar.name])
    testMean = statc.mean(predValuesList)

    errSum = 0.0
    meanSum = 0.0
    for ex in testData:
        errSum = errSum + math.pow(ex.getclass() - string.atof(str(predictor(ex))),2)
        meanSum = meanSum + math.pow(testMean - ex.getclass(),2)

    Rsqrt = 1 - errSum/meanSum
    return Rsqrt

예제 #18

0

파일 보기

파일: evalUtilities.py 프로젝트: pedroalmeida/AZOrange

def getRsqrt(testData, predictor):
    """Calculate the coefficient of determination (R-squared) for the orange model predictor on the data set testData. 
        This uses the Test Set Activity Mean
        R^2 = 1 - sum((pred - actual)^2)/(sum((testMean - actual)^2))"""

    # Calc average of the response variable
    actualValuesList = []
    for ex in testData:
        actualValuesList.append(ex.getclass().value)
    testMean = statc.mean(actualValuesList)

    errSum = 0.0
    meanSum = 0.0
    for ex in testData:
        errSum = errSum + math.pow(ex.getclass() - string.atof(str(predictor(ex))),2)
        meanSum = meanSum + math.pow(testMean - ex.getclass(),2)

    Rsqrt = 1 - errSum/meanSum
    return Rsqrt

예제 #19

0

파일 보기

파일: transform.py 프로젝트: JakaKokosar/orange-bio

    def build_feature(self, data, gs):

        at = Orange.feature.Continuous(name=str(gs))
        geneset = list(gs.genes)

        nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True)

        gsi = [ name_ind[g] for g in genes ]
        gausse = compute_llr(data, gsi, self._gauss_cache)
        genes_gs = [ to_geneset[g] for g in genes ]

        if self.normalize: # per (3) in the paper
            #compute log ratios for all samples and genes from this gene set
            for i, gene_gs, g in zip(gsi, genes_gs, gausse):
                if gene_gs not in self._normalizec: #skip if computed already
                    r = [ _llrlogratio(ex[i].value, *g) for ex in data ]
                    self._normalizec[gene_gs] = (mean(r), std(r))

        def t(ex, w, genes_gs=genes_gs, gausse=gausse, normalizec=self._normalizec):
            nm2, name_ind2, genes2 = self._match_instance(ex, genes_gs, None)
            gsvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ]

            vals = [ _llrlogratio(v, *g) if v != "?" else 0.0 for v,g in zip(gsvalues, gausse) ]

            if len(normalizec): #normalize according to (3)
                vals2 = []
                for v,g in zip(vals, genes_gs):
                    m,s = normalizec[g]
                    if s == 0: #disregard attributes without differences
                        vals2.append(0.)
                    else:
                        vals2.append((v-m)/s)
                vals = vals2
            
            return sum(vals)
     
        at.get_value_from = t
        return at

예제 #20

0

파일 보기

파일: getUnbiasedAccuracy.py 프로젝트: girschic/AZOrange

    def createStatObj(
        self,
        results=None,
        exp_pred=None,
        nTrainCmpds=None,
        nTestCmpds=None,
        responseType=None,
        nExtFolds=None,
        userAlert="",
        rocs=None,
    ):
        # Initialize res (statObj) for statistic results
        res = {}
        self.__log("Starting to create Stat Obj")
        # Classification
        res["CA"] = None
        res["CM"] = None
        res["MCC"] = None
        res["ROC"] = None
        # Regression
        res["Q2"] = None
        res["RMSE"] = None
        # Both
        res["StabilityValue"] = None
        res["userAlert"] = userAlert
        res["selected"] = False
        res["stable"] = False
        res["responseType"] = False
        res["foldStat"] = {
            "nTrainCmpds": None,
            "nTestCmpds": None,
            # Regression
            "Q2": None,
            "RMSE": None,
            # Classification
            "CM": None,
            "CA": None,
            "MCC": None,
            "ROC": None,
        }
        if (
            results is None
        ):  # or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
            self.__log("    NONE...")
            return res
        res["responseType"] = responseType
        # Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
        if responseType == "Classification":
            # Compute CA
            res["CA"] = sum(r[0] for r in results) / nExtFolds
            # Compute CM
            res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
            for r in results[1:]:
                for Lidx, line in enumerate(r[1]):
                    for idx, val in enumerate(line):
                        res["CM"][Lidx][idx] = res["CM"][Lidx][idx] + val  # Add each same ConfMat position
            # Compute MCC
            res["MCC"] = evalUtilities.calcMCC(res["CM"])
            # Compute ROC
            res["ROC"] = sum(ro[0] for ro in rocs) / self.nExtFolds
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["CA"] = [r[0] for r in results]
            res["foldStat"]["CM"] = [r[1] for r in results]
            res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
            res["foldStat"]["ROC"] = [ro for ro in rocs]
            # Compute Stability
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
        else:
            # compute Q2
            res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
            # compute RMSE
            res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
            # Compute foldStat
            res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
            res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
            res["foldStat"]["RMSE"] = [r[0] for r in results]
            res["foldStat"]["Q2"] = [r[1] for r in results]
            # Compute Stability value
            res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])
        # Evaluate stability of ML
        StabilityValue = res["StabilityValue"]
        if StabilityValue is not None:
            if responseType == "Classification":
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
            else:
                if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                else:
                    stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
            if StabilityValue < stableTH:  # Select only stable models
                res["stable"] = True

        return res

예제 #21

0

파일 보기

파일: OWItemsetViz.py 프로젝트: pauloortins/Computer-Vision-Classes---UFBA

    def setHubs(self, i=None):
        if not i is None:
            self.hubs = i

        self.graph.tooltipNeighbours = self.hubs == 2 and self.markDistance or 0
        self.graph.markWithRed = False

        if not self.visualize or not self.visualize.graph:
            return

        hubs = self.hubs
        vgraph = self.visualize.graph

        if hubs == 0:
            return

        elif hubs == 1:
            txt = self.markSearchString
            labelText = self.graph.labelText
            self.graph.markWithRed = self.graph.nVertices > 200
            self.graph.setMarkedNodes(
                [i for i, values in enumerate(vgraph.items) if txt in " ".join([str(values[ndx]) for ndx in labelText])]
            )
            print [
                i for i, values in enumerate(vgraph.items) if txt in " ".join([str(values[ndx]) for ndx in labelText])
            ]
            return

        elif hubs == 2:
            self.graph.setMarkedNodes([])
            self.graph.tooltipNeighbours = self.markDistance
            return

        elif hubs == 3:
            self.graph.setMarkedNodes([])
            self.graph.selectionNeighbours = self.markDistance
            self.graph.markSelectionNeighbours()
            return

        self.graph.tooltipNeighbours = self.graph.selectionNeighbours = 0
        powers = vgraph.getDegrees()

        if hubs == 4:  # at least N connections
            N = self.markNConnections
            self.graph.setMarkedNodes([i for i, power in enumerate(powers) if power >= N])
        elif hubs == 5:
            N = self.markNConnections
            self.graph.setMarkedNodes([i for i, power in enumerate(powers) if power <= N])
        elif hubs == 6:
            self.graph.setMarkedNodes(
                [
                    i
                    for i, power in enumerate(powers)
                    if power > max([0] + [powers[nn] for nn in vgraph.getNeighbours(i)])
                ]
            )
        elif hubs == 7:
            self.graph.setMarkedNodes(
                [
                    i
                    for i, power in enumerate(powers)
                    if power > mean([0] + [powers[nn] for nn in vgraph.getNeighbours(i)])
                ]
            )
        elif hubs == 8:
            sortedIdx = range(len(powers))
            sortedIdx.sort(lambda x, y: -cmp(powers[x], powers[y]))
            cutP = self.markNumber
            cutPower = powers[sortedIdx[cutP]]
            while cutP < len(powers) and powers[sortedIdx[cutP]] == cutPower:
                cutP += 1
            self.graph.setMarkedNodes(sortedIdx[: cutP - 1])

예제 #22

0

파일 보기

    def setHubs(self, i=None):
        if not i is None:
            self.hubs = i

        self.graph.tooltipNeighbours = self.hubs == 2 and self.markDistance or 0
        self.graph.markWithRed = False

        if not self.visualize or not self.visualize.graph:
            return

        hubs = self.hubs
        vgraph = self.visualize.graph

        if hubs == 0:
            return

        elif hubs == 1:
            txt = self.markSearchString
            labelText = self.graph.labelText
            self.graph.markWithRed = self.graph.nVertices > 200
            self.graph.setMarkedNodes([
                i for i, values in enumerate(vgraph.items)
                if txt in " ".join([str(values[ndx]) for ndx in labelText])
            ])
            print[
                i for i, values in enumerate(vgraph.items)
                if txt in " ".join([str(values[ndx]) for ndx in labelText])
            ]
            return

        elif hubs == 2:
            self.graph.setMarkedNodes([])
            self.graph.tooltipNeighbours = self.markDistance
            return

        elif hubs == 3:
            self.graph.setMarkedNodes([])
            self.graph.selectionNeighbours = self.markDistance
            self.graph.markSelectionNeighbours()
            return

        self.graph.tooltipNeighbours = self.graph.selectionNeighbours = 0
        powers = vgraph.getDegrees()

        if hubs == 4:  # at least N connections
            N = self.markNConnections
            self.graph.setMarkedNodes(
                [i for i, power in enumerate(powers) if power >= N])
        elif hubs == 5:
            N = self.markNConnections
            self.graph.setMarkedNodes(
                [i for i, power in enumerate(powers) if power <= N])
        elif hubs == 6:
            self.graph.setMarkedNodes([
                i for i, power in enumerate(powers)
                if power > max([0] +
                               [powers[nn] for nn in vgraph.getNeighbours(i)])
            ])
        elif hubs == 7:
            self.graph.setMarkedNodes([
                i for i, power in enumerate(powers)
                if power > mean([0] +
                                [powers[nn] for nn in vgraph.getNeighbours(i)])
            ])
        elif hubs == 8:
            sortedIdx = range(len(powers))
            sortedIdx.sort(lambda x, y: -cmp(powers[x], powers[y]))
            cutP = self.markNumber
            cutPower = powers[sortedIdx[cutP]]
            while cutP < len(powers) and powers[sortedIdx[cutP]] == cutPower:
                cutP += 1
            self.graph.setMarkedNodes(sortedIdx[:cutP - 1])

예제 #23

0

파일 보기

파일: obiExpression.py 프로젝트: Alshak/clowdflows

 def mean(l):
     return statc.mean(l)

예제 #24

0

파일 보기

파일: evalUtilities.py 프로젝트: AZCompTox/AZOrange

def stability(res):
    mean = statc.mean(res)
    dists = [abs(x-mean) for x in res]
    return statc.mean(dists)

예제 #25

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def Rsqrt_obsolete(res=None):
    """
    Calculates the R-squared (Coefficient of determination) of orngTest.ExperimentResults in res
    The results res must be from a learner
    """
    # If Called without arguments, return the type of problems this method can be used for:
    # 1 - Classification problems (Discrete Class)
    # 2 - Regression problems (Continuous Class)
    # 3 - Both Regression and Classification problems (Continuous or Discrete Class)
    if res == None:
        return {"type": REGRESSION}

    if res.numberOfIterations > 1:
        Rs = [[0.0] * res.numberOfIterations
              for i in range(res.numberOfLearners)]
        errSum = [[0.0] * res.numberOfIterations
                  for i in range(res.numberOfLearners)]
        meanSum = [[0.0] * res.numberOfIterations
                   for i in range(res.numberOfLearners)]
        means = [[0.0] * res.numberOfIterations
                 for i in range(res.numberOfLearners)]
        nIter = [0] * res.numberOfIterations
        for tex in res.results:
            ac = float(tex.actualClass)
            nIter[tex.iterationNumber] += 1
            for i, cls in enumerate(tex.classes):
                means[i][tex.iterationNumber] += ac
        for nit, it in enumerate(nIter):
            for i, cls in enumerate(tex.classes):
                means[i][nit] /= it

        for tex in res.results:
            ac = float(tex.actualClass)
            for i, cls in enumerate(tex.classes):
                errSum[i][tex.iterationNumber] += (float(cls) - ac)**2
                meanSum[i][tex.iterationNumber] += (
                    means[i][tex.iterationNumber] - ac)**2
        for learner in range(res.numberOfLearners):
            for it in range(len(nIter)):
                if meanSum[learner][it] == 0:
                    return "N/A"
                Rs[learner][it] = 1 - (errSum[learner][it] /
                                       meanSum[learner][it])
        return [statc.mean(x) for x in Rs]

    else:
        RsqrtList = []
        for nLearner in range(len(res.results[0].classes)):
            # Calc average of the prediction variable
            testMean = 0
            for ex in res.results:
                testMean = testMean + ex.actualClass
            testMean = testMean / len(res.results)
            errSum = 0.0
            meanSum = 0.0
            for ex in res.results:
                errSum = errSum + math.pow(
                    ex.actualClass - ex.classes[nLearner], 2)
                meanSum = meanSum + math.pow(testMean - ex.actualClass, 2)
            if meanSum == 0:
                return "N/A"
            RsqrtList.append(1 - errSum / meanSum)
        return RsqrtList

예제 #26

0

파일 보기

파일: evalUtilities.py 프로젝트: tojojames/AZOrange

def stability(res):
    mean = statc.mean(res)
    dists = [abs(x - mean) for x in res]
    return statc.mean(dists)

예제 #27

0

파일 보기

fh = open("RES_out"+os.environ["SGE_TASK_ID"]+".pkl","w")
pickle.dump(evaluateMethod(res)[0], fh)
fh.close()
"""
        # Assess the memory requirements
        memSize = dataUtilities.getApproxMemReq(dataSet)

        evalResList = sgeUtilities.arrayJob(jobName = "EvalJob", jobNumber = %(nExtFolds)s, jobParams = [learner,%(nFolds)s,dataSet,%(evalMethodFunc)s], jobQueue = "batch.q", jobScript = jobScript, memSize = str(memSize)+"M")
    else:
        for idx in range(%(nExtFolds)s):
            MyRandom = orange.RandomGenerator(1000*idx+1)
            res = %(sMethod)s
            evalResList.append(%(evalMethodFunc)s(res)[0])

    if isClassifier:
        evalRes = [round(statc.mean(evalResList),3)]
    else:
        evalRes = [round(statc.mean(evalResList),2)]
    if verbose > 0: print evalRes
else:
    res = %(sMethod)s
    evalRes = %(evalMethodFunc)s(res)

# Save intermediate result
#if os.path.exists("%(runPath)sintRes.txt"):
if [os.path.basename(f) for f in glob("%(runPath)s"+"*intRes.txt")] != []:
    tmpNew=False
else:
    tmpNew=True
#tmp=miscUtilities.lockFile("%(runPath)sintRes.txt","a")
#tmp=open("%(runPath)sintRes.txt","a")

예제 #28

0

파일 보기

파일: getAccWOptParam.py 프로젝트: johan-westin-work/AZOrange-python27port

    def getAcc(self):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        # Set the response type
        self.responseType =  self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification"  or "Regression"
        self.__log("  "+str(self.responseType))

        #Create the Train and test sets
        DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds) 
        
        #Var for saving each Fols result
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}
        
        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models={}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  "+str([x for x in MLmethods]))

        #Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in range(self.nExtFolds):
            trainData = self.data.select(DataIdxs[foldN],negate=1)
            self.__checkTrainData(trainData)

        for ml in MLmethods:
          self.__log("    > "+str(ml)+"...")
          try:
            #Var for saving each Fols result
            results[ml] = []
            exp_pred[ml] = []
            models[ml] = []
            nTrainEx[ml] = []
            nTestEx[ml] = []
            logTxt = "" 
            for foldN in range(self.nExtFolds):
                if type(self.learner) == dict:
                    self.paramList = None

                trainData = self.data.select(DataIdxs[foldN],negate=1)
                testData = self.data.select(DataIdxs[foldN])
                nTrainEx[ml].append(len(trainData))
                nTestEx[ml].append(len(testData))
                #Test if trainsets inside optimizer will respect dataSize criterias.
                #  if not, don't optimize, but still train the model
                dontOptimize = False
                if self.responseType != "Classification" and (len(trainData)*(1-1.0/self.nInnerFolds) < 20):
                    dontOptimize = True
                else:                      
                    tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds)
                    tmpTrainData = trainData.select(tmpDataIdxs[0],negate=1)
                    if not self.__checkTrainData(tmpTrainData, False):
                        dontOptimize = True

                if dontOptimize:
                    logTxt += "       Fold "+str(foldN)+": Too few compounds to optimize model hyper-parameters\n"
                    self.__log(logTxt)
                else:
                    runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AccWOptParam")
                    trainData.save(os.path.join(runPath,"trainData.tab"))

                    paramOptUtilities.getOptParam(
                        learner = MLmethods[ml], 
                        trainDataFile = os.path.join(runPath,"trainData.tab"), 
                        paramList = self.paramList, 
                        useGrid = False, 
                        verbose = self.verbose, 
                        queueType = self.queueType, 
                        runPath = runPath, 
                        nExtFolds = None, 
                        nFolds = self.nInnerFolds)
                    if not MLmethods[ml].optimized:
                        self.__log("       The learner "+str(ml)+" was not optimized.")
                        raise Exception("The learner "+str(ml)+" was not optimized.")
                    miscUtilities.removeDir(runPath) 
                #Train the model
                model = MLmethods[ml](trainData)
                models[ml].append(model)
                #Test the model
                if self.responseType == "Classification":
                    results[ml].append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                else:
                    local_exp_pred = []
                    for ex in testData:
                        local_exp_pred.append((ex.getclass(), model(ex)))
                    results[ml].append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                    #Save the experimental value and correspondent predicted value
                    exp_pred[ml] += local_exp_pred
   
            res = self.createStatObj(results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml],self.responseType, self.nExtFolds, logTxt)
            if self.verbose > 0: 
                print "AccWOptParamGetter!Results  "+ml+":\n"
                pprint(res)
            if not res:
                raise Exception("No results available!")
            statistics[ml] = res.copy()
            self.__writeResults(statistics)
            self.__log("       OK")
          except:
            self.__log("       Learner "+str(ml)+" failed to create/optimize the model!")
            res = self.createStatObj()
            statistics[ml] = res.copy()

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models 
            #   ONLY if there are more that one model stable!
            stableML={}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None:
                    if self.responseType == "Classification":
                        if statc.mean(statistics[modelName]["foldStat"]["nTestCmpds"]) > 50:
                            stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
                        else:
                            stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
                    else:
                        if statc.mean(statistics[modelName]["foldStat"]["nTestCmpds"]) > 50:
                            stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
                        else:
                            stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
                    if StabilityValue < stableTH:   # Select only stable models
                        stableML[modelName] = statistics[modelName].copy()
            if len(stableML) >= 2:
                self.__log("Found "+str(len(stableML))+" stable MLmethods out of "+str(len(statistics))+" MLmethods.")
                if self.responseType == "Classification":
                    CLASS0 = str(self.data.domain.classVar.values[0])
                    CLASS1 = str(self.data.domain.classVar.values[1])
                    exprTest0 = "(0"
                    for ml in stableML:
                        exprTest0 += "+( "+ml+" == "+CLASS0+" )*"+str(stableML[ml]["CA"])+" "
                    exprTest0 += ")/IF0(sum([False"
                    for ml in stableML:
                        exprTest0 += ", "+ml+" == "+CLASS0+" "
                    exprTest0 += "]),1)"
                    exprTest1 = exprTest0.replace(CLASS0,CLASS1)
                    expression = [exprTest0+" >= "+exprTest1+" -> "+CLASS0," -> "+CLASS1]
                else:
                    Q2sum = sum([stableML[ml]["Q2"] for ml in stableML])
                    expression = "(1 / "+str(Q2sum)+") * (0"
                    for ml in stableML:
                        expression += " + "+str(stableML[ml]["Q2"])+" * "+ml+" "
                    expression += ")"

                #Var for saving each Fols result
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log("Calculating the statistics for a Consensus model")
                for foldN in range(self.nExtFolds):
                    testData = self.data.select(DataIdxs[foldN])
                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in stableML:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers = consensusClassifiers, expression = expression)     
                    CnTrainEx.append(model.NTrainEx)
                    #Test the model
                    if self.responseType == "Classification":
                        Cresults.append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        Cresults.append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds)
                statistics["Consensus"] = res.copy()
                statistics["Consensus"]["IndividualStatistics"] = stableML.copy()
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics
                 
        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]

예제 #29

0

파일 보기

def createStatObj(results=None,
                  exp_pred=None,
                  nTrainCmpds=None,
                  nTestCmpds=None,
                  responseType=None,
                  nExtFolds=None,
                  userAlert="",
                  foldSelectedML=None):
    #Initialize res (statObj) for statistic results
    res = {}
    # Classification
    res["CA"] = None
    res["CM"] = None
    res["MCC"] = None
    #Regression
    res["Q2"] = None
    res["RMSE"] = None
    #Both
    res["StabilityValue"] = None
    res["userAlert"] = userAlert
    res["selected"] = False
    res["stable"] = False
    res["responseType"] = False
    res["foldStat"] = {
        "nTrainCmpds": None,
        "nTestCmpds": None,
        #Regression
        "Q2": None,
        "RMSE": None,
        #Classification
        "CM": None,
        "CA": None,
        "MCC": None
    }
    if not results or results is None or exp_pred is None or responseType is None or nExtFolds is None or nTestCmpds is None or nTrainCmpds is None:
        return res
    res["responseType"] = responseType
    #Calculate the (Q2, RMSE) or (CM, CA) results depending on Classification or regression
    if responseType == "Classification":
        #Compute CA
        res["CA"] = sum(r[0] for r in results) / nExtFolds
        #Compute CM
        res["CM"] = copy.deepcopy(results[0][1])  # Get the first ConfMat
        for r in results[1:]:
            for Lidx, line in enumerate(r[1]):
                for idx, val in enumerate(line):
                    res["CM"][Lidx][idx] = res["CM"][Lidx][
                        idx] + val  #Add each same ConfMat position
        #Compute MCC
        res["MCC"] = evalUtilities.calcMCC(res["CM"])
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["CA"] = [r[0] for r in results]
        res["foldStat"]["CM"] = [r[1] for r in results]
        res["foldStat"]["MCC"] = [evalUtilities.calcMCC(r[1]) for r in results]
        #Compute Stability
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["CA"])
    else:
        #compute Q2
        res["Q2"] = evalUtilities.calcRsqrt(exp_pred)
        #compute RMSE
        res["RMSE"] = evalUtilities.calcRMSE(exp_pred)
        #Compute foldStat
        res["foldStat"]["nTrainCmpds"] = [n for n in nTrainCmpds]
        res["foldStat"]["nTestCmpds"] = [n for n in nTestCmpds]
        res["foldStat"]["RMSE"] = [r[0] for r in results]
        res["foldStat"]["Q2"] = [r[1] for r in results]
        #Compute Stability value
        res["StabilityValue"] = evalUtilities.stability(res["foldStat"]["Q2"])

    # Save selectedMLs if passed
    if foldSelectedML:
        res["foldStat"]["foldSelectedML"] = [ml for ml in foldSelectedML]

    #Evaluate stability of ML
    StabilityValue = res["StabilityValue"]
    if StabilityValue is not None:
        if responseType == "Classification":
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_CLASS_H
        else:
            if statc.mean(res["foldStat"]["nTestCmpds"]) > 50:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_L
            else:
                stableTH = AZOC.QSARSTABILITYTHRESHOLD_REG_H
        if StabilityValue < stableTH:  # Select only stable models
            res["stable"] = True

    return res