def getAccStat(rankSumTuple, nDesc, train, randTest, extTest, resultsFid,
               projectName):

    print "Select features based on top ranked features"
    attrList = []
    for elem in rankSumTuple:
        if len(attrList) < nDesc:
            attrList.append(elem[0])
    train = dataUtilities.attributeSelectionData(train, attrList)
    train = dataUtilities.attributeDeselectionData(
        train, ['HLM_XEN025;Mean;CLint (uL/min/mg);(Num)'])

    print train.domain.attributes, len(
        train.domain.attributes), train.domain.classVar

    # Get accuracies
    learners = [AZorngRF.RFLearner(nTrees=100)]
    print "CV accuracy"
    MCC_CV = printCV(train, learners, resultsFid, projectName)
    Model = learners[0](train)
    print "Random Test set accuracy"
    MCC_rand = printTestSetAcc(Model, randTest, learners, resultsFid,
                               projectName, True)
    print "External Test set accuracy"
    MCC_ext = printTestSetAcc(Model, extTest, learners, resultsFid,
                              projectName, False)

    return MCC_CV, MCC_rand, MCC_ext
def buildConsensus(trainData, learners, MLMethods, logFile = None):
        log(logFile, "Building a consensus model based on optimized MLmethods: "+str([ml for ml in MLMethods])+"...")
        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
            #Expression: If  CAavg_{POS} ge CAavg_{NEG} -> POS  else -> NEG 
            #    where CAavg_{POS} is the average of classification accuracies of all models predicting POS.
            CLASS0 = str(trainData.domain.classVar.values[0])
            CLASS1 = str(trainData.domain.classVar.values[1])
            #exprTest0
            exprTest0 = "(0"
            for ml in MLMethods:
                exprTest0 += "+( "+ml+" == "+CLASS0+" )*"+str(MLMethods[ml]["optAcc"])+" "
            exprTest0 += ")/IF0(sum([False"
            for ml in MLMethods:
                exprTest0 += ", "+ml+" == "+CLASS0+" "
            exprTest0 += "]),1)"
            # exprTest1
            exprTest1 = "(0"
            for ml in MLMethods:
                exprTest1 += "+( "+ml+" == "+CLASS1+" )*"+str(MLMethods[ml]["optAcc"])+" "
            exprTest1 += ")/IF0(sum([False"
            for ml in MLMethods:
                exprTest1 += ", "+ml+" == "+CLASS1+" "
            exprTest1 += "]),1)"
            # expression
            expression = [exprTest0+" >= "+exprTest1+" -> "+CLASS0," -> "+CLASS1]
        else:
            Q2sum = sum([MLMethods[ml]["optAcc"] for ml in MLMethods])
            expression = "(1 / "+str(Q2sum)+") * (0"
            for ml in MLMethods:
                expression += " + "+str(MLMethods[ml]["optAcc"])+" * " + ml +" "
            expression += ")" 

        consensusLearners = {}
        for learnerName in learners:
            consensusLearners[learnerName] = learners[learnerName]
        
        learner = AZorngConsensus.ConsensusLearner(learners = consensusLearners, expression = expression)
        log(logFile, "  Training Consensus Learner")
        smilesAttr = dataUtilities.getSMILESAttr(trainData)
        if smilesAttr:
            log(logFile,"Found SMILES attribute:"+smilesAttr)
            if learner.specialType == 1:
               trainData = dataUtilities.attributeSelectionData(trainData, [smilesAttr, trainData.domain.classVar.name])
               log(logFile,"Selected attrs: "+str([attr.name for attr in trainData.domain]))
            else:
               trainData = dataUtilities.attributeDeselectionData(trainData, [smilesAttr])
               log(logFile,"Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] +\
                                              [attr.name for attr in trainData.domain[len(trainData.domain)-3:]]))

        return learner(trainData)
Exemple #3
0
def filterDesc(data, zeroFracT = 0.95, LowVarT = 0.01, HighCorrT = 0.95):

    print "Initial number of descriptors ", len(data.domain.attributes)

    # Find the descriptors for which the fraction of zeros is smaller than zeroFracT - keep these
    attrList = []
    rmAttrList = []
    for attr in data.domain.attributes:
        valueList = []
        nZero = 0
        for ex in data:
            value = ex[attr.name].value
            if value == 0:
                nZero = nZero + 1
            valueList.append(value)
        zeroFrac = float(nZero)/len(valueList)
        if zeroFrac < zeroFracT:
            attrList.append(attr.name)
        else:
            rmAttrList.append(attr.name)
    print "Descriptors deselected because of a large fraction of zeros: "
    print rmAttrList
    data = dataUtilities.attributeSelectionData(data, attrList)
    print "Remaining number of descriptors ", len(data.domain.attributes)

    # Filter descriptors based on normalized variance
    rmAttrList = []
    for attr in data.domain.attributes:
        valueList = []
        for ex in data:
            value = ex[attr.name].value
            valueList.append(value)
        variance = numpy.var(valueList)
        mean = numpy.mean(valueList)
        normVar = variance/mean
        if normVar < LowVarT:
            rmAttrList.append(attr.name)
        
    print "Descriptors deselected because of low variance "
    print rmAttrList
    data = dataUtilities.attributeDeselectionData(data, rmAttrList)
    print "Remaining number of descriptors ", len(data.domain.attributes)
    
    print "Correlation filter not implemented yet"

    return data
Exemple #4
0
def buildModel(trainData,
               MLMethod,
               queueType="NoSGE",
               verbose=0,
               logFile=None):
    """
        Buld the method passed in MLMethod and optimize ( "IndividualStatistics"  not in MLMethod)
        if MLMethod is a Consensus ("individualStatistics"  in MLMethod) , build each and optimize first all models and after build the consensus!
        """
    log(logFile,
        "Building and optimizing learner: " + MLMethod["MLMethod"] + "...")
    learners = {}
    MLMethods = {}
    if "IndividualStatistics" in MLMethod:  #It is a consensus and will certaily not contain any
        #special model as it was filtered in the getUnbiasedAcc
        for ML in MLMethod["IndividualStatistics"]:
            MLMethods[ML] = copy.deepcopy(MLMethod["IndividualStatistics"][ML])
    else:
        ML = MLMethod["MLMethod"]
        if MLMETHODS[ML](
                name=ML
        ).specialType == 1:  # If is a special model and has a built-in optimizaer
            log(logFile, "       This is a special model")
            smilesAttr = dataUtilities.getSMILESAttr(trainData)
            if smilesAttr:
                log(logFile, "Found SMILES attribute:" + smilesAttr)
                trainData = dataUtilities.attributeSelectionData(
                    trainData, [smilesAttr, trainData.domain.classVar.name])
            optInfo, SpecialModel = MLMETHODS[ML](name=ML).optimizePars(
                trainData, folds=5)
            return SpecialModel
        else:
            MLMethods[MLMethod["MLMethod"]] = MLMethod

    smilesAttr = dataUtilities.getSMILESAttr(trainData)
    if smilesAttr:
        trainData = dataUtilities.attributeDeselectionData(
            trainData, [smilesAttr])

    # optimize all MLMethods
    for ML in MLMethods:
        log(logFile, "  Optimizing MLmethod: " + ML)
        learners[ML] = MLMETHODS[ML](name=ML)

        runPath = miscUtilities.createScratchDir(
            baseDir=AZOC.NFS_SCRATCHDIR, desc="competitiveWorkflow_BuildModel")
        trainData.save(os.path.join(runPath, "trainData.tab"))

        tunedPars = paramOptUtilities.getOptParam(learner=learners[ML],
                                                  trainDataFile=os.path.join(
                                                      runPath,
                                                      "trainData.tab"),
                                                  useGrid=False,
                                                  verbose=verbose,
                                                  queueType=queueType,
                                                  runPath=runPath,
                                                  nExtFolds=None,
                                                  logFile=logFile,
                                                  getTunedPars=True)

        if not learners[ML].optimized:
            print "WARNING: competitiveWorkflow: The learner " + str(
                learners[ML]) + " was not optimized."
            #print "         Using default parameters"
            print "         The " + str(learners[ML]) + " will not be included"
            #print "         Returning None"
            print "             DEBUG can be made in: " + runPath
            #Setting default parameters
            #learners[ML] = learners[ML].__class__()
            #return None
            learners.pop(ML)
            continue
        else:
            print "Optimized learner ", learners[ML]
            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                MLMethods[ML]["optAcc"] = tunedPars[0]
            else:
                res = orngTest.crossValidation(
                    [learners[ML]],
                    trainData,
                    folds=5,
                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                    randomGenerator=random.randint(0, 100))
                R2 = evalUtilities.R2(res)[0]
                MLMethods[ML]["optAcc"] = R2
            miscUtilities.removeDir(runPath)
    #Train the model
    if len(learners) == 1:
        log(logFile, "  Building the model:" + learners.keys()[0])
        model = learners[learners.keys()[0]](trainData)
    elif len(learners) >= 1:
        model = buildConsensus(trainData, learners, MLMethods)
    else:
        print "ERROR: No Learners were selected!"
        return None

    return model
Exemple #5
0
def buildConsensus(trainData, learners, MLMethods, logFile=None):
    log(
        logFile, "Building a consensus model based on optimized MLmethods: " +
        str([ml for ml in MLMethods]) + "...")
    if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
        #Expression: If  CAavg_{POS} ge CAavg_{NEG} -> POS  else -> NEG
        #    where CAavg_{POS} is the average of classification accuracies of all models predicting POS.
        CLASS0 = str(trainData.domain.classVar.values[0])
        CLASS1 = str(trainData.domain.classVar.values[1])
        #exprTest0
        exprTest0 = "(0"
        for ml in MLMethods:
            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(
                MLMethods[ml]["optAcc"]) + " "
        exprTest0 += ")/IF0(sum([False"
        for ml in MLMethods:
            exprTest0 += ", " + ml + " == " + CLASS0 + " "
        exprTest0 += "]),1)"
        # exprTest1
        exprTest1 = "(0"
        for ml in MLMethods:
            exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str(
                MLMethods[ml]["optAcc"]) + " "
        exprTest1 += ")/IF0(sum([False"
        for ml in MLMethods:
            exprTest1 += ", " + ml + " == " + CLASS1 + " "
        exprTest1 += "]),1)"
        # expression
        expression = [
            exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1
        ]
    else:
        Q2sum = sum([MLMethods[ml]["optAcc"] for ml in MLMethods])
        expression = "(1 / " + str(Q2sum) + ") * (0"
        for ml in MLMethods:
            expression += " + " + str(
                MLMethods[ml]["optAcc"]) + " * " + ml + " "
        expression += ")"

    consensusLearners = {}
    for learnerName in learners:
        consensusLearners[learnerName] = learners[learnerName]

    learner = AZorngConsensus.ConsensusLearner(learners=consensusLearners,
                                               expression=expression)
    log(logFile, "  Training Consensus Learner")
    smilesAttr = dataUtilities.getSMILESAttr(trainData)
    if smilesAttr:
        log(logFile, "Found SMILES attribute:" + smilesAttr)
        if learner.specialType == 1:
            trainData = dataUtilities.attributeSelectionData(
                trainData, [smilesAttr, trainData.domain.classVar.name])
            log(
                logFile, "Selected attrs: " +
                str([attr.name for attr in trainData.domain]))
        else:
            trainData = dataUtilities.attributeDeselectionData(
                trainData, [smilesAttr])
            log(logFile,"Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] +\
                                           [attr.name for attr in trainData.domain[len(trainData.domain)-3:]]))

    return learner(trainData)
    print "Number of attributes ", nAttr
    print "Maximum number of desc combinations ", pow(2, nAttr)
    print "Ndesc must be lower than the max number of desc combinations"
    print NdescComb

    # Randomly sample Ndesc combinations
    attrList = getDescComb(data, nAttr, NdescComb)

    # Rank the accuracy of each descriptor by averaging the accuracy of all models including a descriptor

    # Select all descriptors above median accuracy and repeat the random sampling of desc combinations

    return attrList


if __name__ == "__main__":

    dataFile = "trainDataAllEP.txt"
    data = dataUtilities.DataTable(dataFile)

    attrList = [
        "IT03423_Seq_BF", "hERG_IW_pIC50", "IT03423_BF", "IT03423_perc101_BF",
        "Caco2_intrinsic", "ACDlogD74", "Conc_QTc", "IT03713_BF", "IT10850_BF",
        "IT22015_BF", "IT22016_BF"
    ]
    data = dataUtilities.attributeSelectionData(data, attrList)

    NdescComb = 100  # Number of desc combinations to sample in the first iteration
    attrList = descSelection(data, NdescComb)
    print attrList
    def getAcc(self, callBack = None, callBackWithFoldModel = None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        # Set the response type
        self.responseType =  self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification"  or "Regression"
        self.__log("  "+str(self.responseType))

        #Create the Train and test sets
        if self.usePreDefFolds:
            DataIdxs = self.preDefIndices 
        else:
            DataIdxs = self.sampler(self.data, self.nExtFolds) 
        foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0] #Folds used only from 1 on ... 0 are for fixed train Bias
        nFolds = len(foldsN)
        #Fix the Indexes based on DataIdxs
        # (0s) represents the train set  ( >= 1s) represents the test set folds
        if self.useVarCtrlCV:
            nShifted = [0] * nFolds
            for idx,isTest in enumerate(self.preDefIndices):  # self.preDefIndices == 0 are to be used in TrainBias
                if not isTest:
                    if DataIdxs[idx]:
                        nShifted[DataIdxs[idx]] += 1
                        DataIdxs[idx] = 0
            for idx,shift in enumerate(nShifted):
                self.__log("In fold "+str(idx)+", "+str(shift)+" examples were shifted to the train set.")

        #Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}
        
        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models={}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  "+str([x for x in MLmethods]))

        #Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in foldsN:
            trainData = self.data.select(DataIdxs,foldN,negate=1)
            self.__checkTrainData(trainData)

        #Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0,"PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds  
        for ml in sortedML:
          startTime = time.time()
          self.__log("    > "+str(ml)+"...")
          try:
            #Var for saving each Fols result
            results[ml] = []
            exp_pred[ml] = []
            models[ml] = []
            nTrainEx[ml] = []
            nTestEx[ml] = []
            optAcc[ml] = []
            logTxt = ""
            for foldN in foldsN:
                if type(self.learner) == dict:
                    self.paramList = None

                trainData = self.data.select(DataIdxs,foldN,negate=1)
                testData = self.data.select(DataIdxs,foldN)
                smilesAttr = dataUtilities.getSMILESAttr(trainData)
                if smilesAttr:
                    self.__log("Found SMILES attribute:"+smilesAttr)
                    if MLmethods[ml].specialType == 1:
                       trainData = dataUtilities.attributeSelectionData(trainData, [smilesAttr, trainData.domain.classVar.name]) 
                       testData = dataUtilities.attributeSelectionData(testData, [smilesAttr, testData.domain.classVar.name]) 
                       self.__log("Selected attrs: "+str([attr.name for attr in trainData.domain]))
                    else:
                       trainData = dataUtilities.attributeDeselectionData(trainData, [smilesAttr]) 
                       testData = dataUtilities.attributeDeselectionData(testData, [smilesAttr]) 
                       self.__log("Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] + [attr.name for attr in trainData.domain[len(trainData.domain)-3:]]))

                nTrainEx[ml].append(len(trainData))
                nTestEx[ml].append(len(testData))
                #Test if trainsets inside optimizer will respect dataSize criterias.
                #  if not, don't optimize, but still train the model
                dontOptimize = False
                if self.responseType != "Classification" and (len(trainData)*(1-1.0/self.nInnerFolds) < 20):
                    dontOptimize = True
                else:                      
                    tmpDataIdxs = self.sampler(trainData, self.nInnerFolds)
                    tmpTrainData = trainData.select(tmpDataIdxs,1,negate=1)
                    if not self.__checkTrainData(tmpTrainData, False):
                        dontOptimize = True

                SpecialModel = None
                if dontOptimize:
                    logTxt += "       Fold "+str(foldN)+": Too few compounds to optimize model hyper-parameters\n"
                    self.__log(logTxt)
                    if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                        res = evalUtilities.crossValidation([MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices.StratifiedIfPossible, random_generator = random.randint(0, 100))
                        CA = evalUtilities.CA(res)[0]
                        optAcc[ml].append(CA)
                    else:
                        res = evalUtilities.crossValidation([MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices.StratifiedIfPossible, random_generator = random.randint(0, 100))
                        R2 = evalUtilities.R2(res)[0]
                        optAcc[ml].append(R2)
                else:
                    if MLmethods[ml].specialType == 1: 
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                    optInfo, SpecialModel = MLmethods[ml].optimizePars(trainData, folds = 5)
                                    optAcc[ml].append(optInfo["Acc"])
                            else:
                                    res = evalUtilities.crossValidation([MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices.StratifiedIfPossible, random_generator = random.randint(0, 100))
                                    R2 = evalUtilities.R2(res)[0]
                                    optAcc[ml].append(R2)
                    else:
                            runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "AccWOptParam", seed = id(trainData))
                            trainData.save(os.path.join(runPath,"trainData.tab"))
                            tunedPars = paramOptUtilities.getOptParam(
                                learner = MLmethods[ml], 
                                trainDataFile = os.path.join(runPath,"trainData.tab"), 
                                paramList = self.paramList, 
                                useGrid = False, 
                                verbose = self.verbose, 
                                queueType = self.queueType, 
                                runPath = runPath, 
                                nExtFolds = None, 
                                nFolds = self.nInnerFolds,
                                logFile = self.logFile,
                                getTunedPars = True,
                                fixedParams = self.fixedParams)
                            if not MLmethods[ml] or not MLmethods[ml].optimized:
                                self.__log("       WARNING: GETACCWOPTPARAM: The learner "+str(ml)+" was not optimized.")
                                self.__log("                It will be ignored")
                                #self.__log("                It will be set to default parameters")
                                self.__log("                    DEBUG can be done in: "+runPath)
                                #Set learner back to default 
                                #MLmethods[ml] = MLmethods[ml].__class__()
                                raise Exception("The learner "+str(ml)+" was not optimized.")
                            else:
                                if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                    optAcc[ml].append(tunedPars[0])
                                else:
                                    res = evalUtilities.crossValidation([MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices.StratifiedIfPossible, random_generator = random.randint(0, 100))
                                    R2 = evalUtilities.R2(res)[0]
                                    optAcc[ml].append(R2)

                                miscUtilities.removeDir(runPath) 
                #Train the model
                if SpecialModel is not None:
                    model = SpecialModel 
                else:
                    model = MLmethods[ml](trainData)
                models[ml].append(model)
                #Test the model
                if self.responseType == "Classification":
                    results[ml].append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                else:
                    local_exp_pred = []
                    # Predict using bulk-predict
                    predictions = model(testData)
                    # Gather predictions
                    for n,ex in enumerate(testData):
                        local_exp_pred.append((ex.getclass().value, predictions[n].value))
                    results[ml].append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                    #Save the experimental value and correspondent predicted value
                    exp_pred[ml] += local_exp_pred
                if callBack:
                     stepsDone += 1
                     if not callBack((100*stepsDone)/nTotalSteps): return None
                if callBackWithFoldModel:
                    callBackWithFoldModel(model) 

            res = self.createStatObj(results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml],self.responseType, self.nExtFolds, logTxt, labels = hasattr(self.data.domain.classVar,"values") and list(self.data.domain.classVar.values) or None )
            if self.verbose > 0: 
                print "UnbiasedAccuracyGetter!Results  "+ml+":\n"
                pprint(res)
            if not res:
                raise Exception("No results available!")
            res["runningTime"] = time.time() - startTime
            statistics[ml] = copy.deepcopy(res)
            self.__writeResults(statistics)
            self.__log("       OK")
          except:
            self.__log("       Learner "+str(ml)+" failed to create/optimize the model!")
            error = str(sys.exc_info()[0]) +" "+\
                        str(sys.exc_info()[1]) +" "+\
                        str(traceback.extract_tb(sys.exc_info()[2]))
            self.__log(error)
 
            res = self.createStatObj()
            statistics[ml] = copy.deepcopy(res)
            self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models 
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0)
            consensusMLs={}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName]["stable"]:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            self.__log("Found "+str(len(consensusMLs))+" stable MLmethods out of "+str(len(statistics))+" MLmethods.")

            if len(consensusMLs) <= 1:   # we need more models to build a consensus!
                consensusMLs={}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            # Exclude specialType models 
            excludeThis = []
            for learnerName in consensusMLs:
                if models[learnerName][0].specialType > 0:
                    excludeThis.append(learnerName)
            for learnerName in excludeThis:
                consensusMLs.pop(learnerName)
                self.__log("    > Excluded special model " + learnerName)
            self.__log("    > Stable modules: " + str(consensusMLs.keys()))

            if len(consensusMLs) >= 2:
                #Var for saving each Fols result
                startTime = time.time()
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log("Calculating the statistics for a Consensus model based on "+str([ml for ml in consensusMLs]))
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        # exprTest0
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( "+ml+" == "+CLASS0+" )*"+str(optAcc[ml][foldN])+" "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", "+ml+" == "+CLASS0+" "
                        exprTest0 += "]),1)"
                        # exprTest1
                        exprTest1 = "(0"
                        for ml in consensusMLs:
                            exprTest1 += "+( "+ml+" == "+CLASS1+" )*"+str(optAcc[ml][foldN])+" "
                        exprTest1 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest1 += ", "+ml+" == "+CLASS1+" "
                        exprTest1 += "]),1)"
                        # Expression
                        expression = [exprTest0+" >= "+exprTest1+" -> "+CLASS0," -> "+CLASS1]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / "+str(Q2sum)+") * (0"
                        for ml in consensusMLs:
                            expression += " + "+str(optAcc[ml][foldN])+" * "+ml+" "
                        expression += ")"

                    testData = self.data.select(DataIdxs,foldN+1)  # fold 0 if for the train Bias!!
                    smilesAttr = dataUtilities.getSMILESAttr(testData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:"+smilesAttr)
                        testData = dataUtilities.attributeDeselectionData(testData, [smilesAttr])
                        self.__log("Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] + [attr.name for attr in trainData.domain[len(trainData.domain)-3:]]))

                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers = consensusClassifiers, expression = expression)     
                    CnTrainEx.append(model.NTrainEx)
                    #Test the model
                    if self.responseType == "Classification":
                        Cresults.append((evalUtilities.getClassificationAccuracy(testData, model), evalUtilities.getConfMat(testData, model) ) )
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n,ex in enumerate(testData):
                            local_exp_pred.append((ex.getclass().value, predictions[n].value))
                        Cresults.append((evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred) ) )
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds, labels = hasattr(self.data.domain.classVar,"values") and list(self.data.domain.classVar.values) or None )
                res["runningTime"] = time.time() - startTime
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics
                 
        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
Exemple #8
0
def Wrapper(train, randTest, extTest, resultsFid, projectName, MCCdict, descList):

    # Select RDKit desc and keep CLint for reference
    attrList = []
    fid = open("RDKitDesc.txt")
    for line in fid:
        attrList.append(string.strip(line))
    fid.close()    
    #attrList = attrList + ["CLint"] 
    attrList = attrList + ["HLM_XEN025;Mean;CLint (uL/min/mg);(Num)"]
    train = dataUtilities.attributeSelectionData(train, attrList)

    print "Ranking with contiuous response"
    train = changeRespVar(train, "HLM_XEN025;Mean;CLint (uL/min/mg);(Num)")
    print "Cont resp ", train.domain.classVar

    print "Pearson ranking"
    scoreTuple = PearsonRank.getRankedAbs(train, 178)
    tuplePrint(scoreTuple, 20)
    rankTuple = getRank(scoreTuple)
    rankSumTuple = rankTuple
    #tuplePrint(rankSumTuple, 50)
    #tuplePrint(rankTuple, 20)

    print "ReliefF ranking "
    relief = Orange.feature.scoring.Relief(k=20, m=50)
    scoreTuple = Orange.feature.scoring.score_all(train, score=relief)
    #tuplePrint(scoreTuple, 20)
    rankTuple = getRank(scoreTuple)
#    rankSumTuple = rankTuple
    rankSumTuple = getRankSum(rankSumTuple, rankTuple)
#    tuplePrint(rankSumTuple, 50)
#    tuplePrint(rankTuple, 20)
#
    print "Ranking with categorical response"
    train = changeRespVar(train, "CLint")
    print "Cat resp ", train.domain.classVar
#
    print "ReliefF ranking "
    relief = Orange.feature.scoring.Relief(k=20, m=50)
    scoreTuple = Orange.feature.scoring.score_all(train, score=relief)
    tuplePrint(scoreTuple, 20)
    rankTuple = getRank(scoreTuple)
#    print "OBS!!!!!!!!!!!! Remove if already done"
#    rankSumTuple = rankTuple
    rankSumTuple = getRankSum(rankSumTuple, rankTuple)
    #tuplePrint(rankTuple, 20)
#
    print "MDL ranking "
    score = Orange.feature.scoring.MDL()
    scoreTuple = Orange.feature.scoring.score_all(train, score)
    tuplePrint(scoreTuple, 20)
    rankTuple = getRank(scoreTuple)
#    rankSumTuple = rankTuple
    rankSumTuple = getRankSum(rankSumTuple, rankTuple)
#    #tuplePrint(rankTuple, 20)
#    #tuplePrint(rankSumTuple, 50)
#
    # Return average rank in sorted list
    rankSumTuple = sortRankSum(rankSumTuple, 1)
    tuplePrint(rankSumTuple, 50)
    #print rankSumTuple

    for nDesc in descList:
        MCC_CV, MCC_rand, MCC_ext = getAccStat(rankSumTuple, nDesc, train, randTest, extTest, resultsFid, projectName)
        MCCdict[projectName][str(nDesc)] = [MCC_CV, MCC_rand, MCC_ext]

    return MCCdict
def descSelection(data, NdescComb):

    nAttr = len(data.domain.attributes)
    print "Number of attributes ", nAttr
    print "Maximum number of desc combinations ", pow(2, nAttr)
    print "Ndesc must be lower than the max number of desc combinations"
    print NdescComb

    # Randomly sample Ndesc combinations
    attrList = getDescComb(data, nAttr, NdescComb)

    # Rank the accuracy of each descriptor by averaging the accuracy of all models including a descriptor

    # Select all descriptors above median accuracy and repeat the random sampling of desc combinations

    return attrList


if __name__ == "__main__":

    dataFile = "trainDataAllEP.txt"
    data = dataUtilities.DataTable(dataFile)

    attrList = ["IT03423_Seq_BF", "hERG_IW_pIC50", "IT03423_BF", "IT03423_perc101_BF", "Caco2_intrinsic", "ACDlogD74", "Conc_QTc", "IT03713_BF", "IT10850_BF", "IT22015_BF", "IT22016_BF"]
    data = dataUtilities.attributeSelectionData(data, attrList)

    NdescComb = 100  # Number of desc combinations to sample in the first iteration
    attrList = descSelection(data, NdescComb)
    print attrList
def buildModel(trainData, MLMethod, queueType = "NoSGE", verbose = 0, logFile = None):
        """
        Buld the method passed in MLMethod and optimize ( "IndividualStatistics"  not in MLMethod)
        if MLMethod is a Consensus ("individualStatistics"  in MLMethod) , build each and optimize first all models and after build the consensus!
        """
        log(logFile, "Building and optimizing learner: "+MLMethod["MLMethod"]+"...")
        learners = {}
        MLMethods = {}
        if "IndividualStatistics"  in MLMethod:         #It is a consensus and will certaily not contain any 
                                                        #special model as it was filtered in the getUnbiasedAcc
            for ML in MLMethod["IndividualStatistics"]:
                MLMethods[ML] = copy.deepcopy(MLMethod["IndividualStatistics"][ML])
        else:
            ML = MLMethod["MLMethod"]
            if MLMETHODS[ML](name = ML).specialType == 1:  # If is a special model and has a built-in optimizaer
                log(logFile, "       This is a special model")
                smilesAttr = dataUtilities.getSMILESAttr(trainData)
                if smilesAttr:
                    log(logFile,"Found SMILES attribute:"+smilesAttr)
                    trainData = dataUtilities.attributeSelectionData(trainData, [smilesAttr, trainData.domain.classVar.name])
                optInfo, SpecialModel = MLMETHODS[ML](name = ML).optimizePars(trainData, folds = 5)
                return SpecialModel
            else:
                MLMethods[MLMethod["MLMethod"]] = MLMethod

        smilesAttr = dataUtilities.getSMILESAttr(trainData)
        if smilesAttr:
            trainData = dataUtilities.attributeDeselectionData(trainData, [smilesAttr])

        # optimize all MLMethods
        for ML in MLMethods:
            log(logFile, "  Optimizing MLmethod: "+ML)
            learners[ML] = MLMETHODS[ML](name = ML)

            runPath = miscUtilities.createScratchDir(baseDir = AZOC.NFS_SCRATCHDIR, desc = "competitiveWorkflow_BuildModel")
            trainData.save(os.path.join(runPath,"trainData.tab"))

            tunedPars = paramOptUtilities.getOptParam(
                learner = learners[ML],
                trainDataFile = os.path.join(runPath,"trainData.tab"),
                useGrid = False,
                verbose = verbose,
                queueType = queueType,
                runPath = runPath,
                nExtFolds = None,
                logFile = logFile,
                getTunedPars = True)

            
            if not learners[ML].optimized:
                print "WARNING: competitiveWorkflow: The learner "+str(learners[ML])+" was not optimized."
                #print "         Using default parameters"
                print "         The "+str(learners[ML])+" will not be included"
                #print "         Returning None"
                print "             DEBUG can be made in: "+runPath 
                #Setting default parameters
                #learners[ML] = learners[ML].__class__()   
                #return None
                learners.pop(ML)
                continue
            else:
                print "Optimized learner ",learners[ML]      
                if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                    MLMethods[ML]["optAcc"] = tunedPars[0] 
                else:
                    res = orngTest.crossValidation([learners[ML]], trainData, folds=5, strat=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator = random.randint(0, 100))
                    R2 = evalUtilities.R2(res)[0]  
                    MLMethods[ML]["optAcc"] = R2
                miscUtilities.removeDir(runPath)
        #Train the model
        if len(learners) == 1:
            log(logFile, "  Building the model:"+learners.keys()[0])
            model = learners[learners.keys()[0]](trainData)
        elif len(learners) >= 1:
            model = buildConsensus(trainData,learners,MLMethods)  
        else:
            print "ERROR: No Learners were selected!"
            return None

        return model
Exemple #11
0
    def getAcc(self, callBack=None, callBackWithFoldModel=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        # Set the response type
        self.responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        self.__log("  " + str(self.responseType))

        #Create the Train and test sets
        if self.usePreDefFolds:
            DataIdxs = self.preDefIndices
        else:
            DataIdxs = self.sampler(self.data, self.nExtFolds)
        foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0
                  ]  #Folds used only from 1 on ... 0 are for fixed train Bias
        nFolds = len(foldsN)
        #Fix the Indexes based on DataIdxs
        # (0s) represents the train set  ( >= 1s) represents the test set folds
        if self.useVarCtrlCV:
            nShifted = [0] * nFolds
            for idx, isTest in enumerate(
                    self.preDefIndices
            ):  # self.preDefIndices == 0 are to be used in TrainBias
                if not isTest:
                    if DataIdxs[idx]:
                        nShifted[DataIdxs[idx]] += 1
                        DataIdxs[idx] = 0
            for idx, shift in enumerate(nShifted):
                self.__log("In fold " + str(idx) + ", " + str(shift) +
                           " examples were shifted to the train set.")

        #Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        #Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in foldsN:
            trainData = self.data.select(DataIdxs, foldN, negate=1)
            self.__checkTrainData(trainData)

        #Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds
        for ml in sortedML:
            startTime = time.time()
            self.__log("    > " + str(ml) + "...")
            try:
                #Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []
                logTxt = ""
                for foldN in foldsN:
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs, foldN, negate=1)
                    testData = self.data.select(DataIdxs, foldN)
                    smilesAttr = dataUtilities.getSMILESAttr(trainData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        if MLmethods[ml].specialType == 1:
                            trainData = dataUtilities.attributeSelectionData(
                                trainData,
                                [smilesAttr, trainData.domain.classVar.name])
                            testData = dataUtilities.attributeSelectionData(
                                testData,
                                [smilesAttr, testData.domain.classVar.name])
                            self.__log(
                                "Selected attrs: " +
                                str([attr.name for attr in trainData.domain]))
                        else:
                            trainData = dataUtilities.attributeDeselectionData(
                                trainData, [smilesAttr])
                            testData = dataUtilities.attributeDeselectionData(
                                testData, [smilesAttr])
                            self.__log("Selected attrs: " + str(
                                [attr.name for attr in trainData.domain[0:3]] +
                                ["..."] + [
                                    attr.name for attr in trainData.
                                    domain[len(trainData.domain) - 3:]
                                ]))

                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    #Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (
                            len(trainData) *
                        (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = self.sampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs,
                                                        1,
                                                        negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    SpecialModel = None
                    if dontOptimize:
                        logTxt += "       Fold " + str(
                            foldN
                        ) + ": Too few compounds to optimize model hyper-parameters\n"
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        if MLmethods[ml].specialType == 1:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optInfo, SpecialModel = MLmethods[
                                    ml].optimizePars(trainData, folds=5)
                                optAcc[ml].append(optInfo["Acc"])
                            else:
                                res = evalUtilities.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    stratified=orange.MakeRandomIndices.
                                    StratifiedIfPossible,
                                    random_generator=random.randint(0, 100))
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)
                        else:
                            runPath = miscUtilities.createScratchDir(
                                baseDir=AZOC.NFS_SCRATCHDIR,
                                desc="AccWOptParam",
                                seed=id(trainData))
                            trainData.save(
                                os.path.join(runPath, "trainData.tab"))
                            tunedPars = paramOptUtilities.getOptParam(
                                learner=MLmethods[ml],
                                trainDataFile=os.path.join(
                                    runPath, "trainData.tab"),
                                paramList=self.paramList,
                                useGrid=False,
                                verbose=self.verbose,
                                queueType=self.queueType,
                                runPath=runPath,
                                nExtFolds=None,
                                nFolds=self.nInnerFolds,
                                logFile=self.logFile,
                                getTunedPars=True,
                                fixedParams=self.fixedParams)
                            if not MLmethods[ml] or not MLmethods[ml].optimized:
                                self.__log(
                                    "       WARNING: GETACCWOPTPARAM: The learner "
                                    + str(ml) + " was not optimized.")
                                self.__log(
                                    "                It will be ignored")
                                #self.__log("                It will be set to default parameters")
                                self.__log(
                                    "                    DEBUG can be done in: "
                                    + runPath)
                                #Set learner back to default
                                #MLmethods[ml] = MLmethods[ml].__class__()
                                raise Exception("The learner " + str(ml) +
                                                " was not optimized.")
                            else:
                                if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                    optAcc[ml].append(tunedPars[0])
                                else:
                                    res = evalUtilities.crossValidation(
                                        [MLmethods[ml]],
                                        trainData,
                                        folds=5,
                                        stratified=orange.MakeRandomIndices.
                                        StratifiedIfPossible,
                                        random_generator=random.randint(
                                            0, 100))
                                    R2 = evalUtilities.R2(res)[0]
                                    optAcc[ml].append(R2)

                                miscUtilities.removeDir(runPath)
                    #Train the model
                    if SpecialModel is not None:
                        model = SpecialModel
                    else:
                        model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    #Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None
                    if callBackWithFoldModel:
                        callBackWithFoldModel(model)

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                res["runningTime"] = time.time() - startTime
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                self.__log("       Learner " + str(ml) +
                           " failed to create/optimize the model!")
                error = str(sys.exc_info()[0]) +" "+\
                            str(sys.exc_info()[1]) +" "+\
                            str(traceback.extract_tb(sys.exc_info()[2]))
                self.__log(error)

                res = self.createStatObj()
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0)
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName][
                        "stable"]:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            self.__log("Found " + str(len(consensusMLs)) +
                       " stable MLmethods out of " + str(len(statistics)) +
                       " MLmethods.")

            if len(consensusMLs
                   ) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            # Exclude specialType models
            excludeThis = []
            for learnerName in consensusMLs:
                if models[learnerName][0].specialType > 0:
                    excludeThis.append(learnerName)
            for learnerName in excludeThis:
                consensusMLs.pop(learnerName)
                self.__log("    > Excluded special model " + learnerName)
            self.__log("    > Stable modules: " + str(consensusMLs.keys()))

            if len(consensusMLs) >= 2:
                #Var for saving each Fols result
                startTime = time.time()
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on "
                    + str([ml for ml in consensusMLs]))
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        # exprTest0
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        # exprTest1
                        exprTest1 = "(0"
                        for ml in consensusMLs:
                            exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest1 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest1 += ", " + ml + " == " + CLASS1 + " "
                        exprTest1 += "]),1)"
                        # Expression
                        expression = [
                            exprTest0 + " >= " + exprTest1 + " -> " + CLASS0,
                            " -> " + CLASS1
                        ]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(
                                optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(
                        DataIdxs, foldN + 1)  # fold 0 if for the train Bias!!
                    smilesAttr = dataUtilities.getSMILESAttr(testData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        testData = dataUtilities.attributeDeselectionData(
                            testData, [smilesAttr])
                        self.__log("Selected attrs: " + str(
                            [attr.name
                             for attr in trainData.domain[0:3]] + ["..."] + [
                                 attr.name for attr in
                                 trainData.domain[len(trainData.domain) - 3:]
                             ]))

                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[
                            learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(
                        classifiers=consensusClassifiers,
                        expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    #Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(
                    Cresults,
                    Cexp_pred,
                    CnTrainEx,
                    CnTestEx,
                    self.responseType,
                    self.nExtFolds,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                res["runningTime"] = time.time() - startTime
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"][
                    "IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]