def test_CanPersistClassificationModelProbabilities(self): """Test the save/load for a classification model - Using probabilities average""" # Arrange learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()] learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.irisData) # Act predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") print scratchdir classifier.write(os.path.join(scratchdir, "./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions, predictionsL) self.assertEqual(len(Loaded.domain), len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_CanPersistClassificationModelMajority(self): """Test the save/load for a classification model - Using Majority""" """ Arrange """ learners = self.createTestLearners() learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.getClassificationTrainingData()) """ Act """ predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir, "./CM.model")) """ Assert """ predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertEqual(len(Loaded.domain), len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions, predictionsL) miscUtilities.removeDir(scratchdir)
def test_CreateLogicalExpressionConsensusLearner(self): """ Test creation of logical expression consensus learner """ # Arrange # Construct expression learner/classifier learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } discreteExpression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) verifiedLearner = AZorngCvSVM.CvSVMLearner() verifiedClassifier = verifiedLearner(self.irisData) # Act result = [] verifiedResult = [] for ex in self.irisData: result.append(discreteClassifier(ex)) verifiedResult.append(verifiedClassifier(ex)) # Assert for index, item in enumerate(result): if not result[index].value == verifiedResult[index].value: print "Not equal on index: ", index self.assertEqual(result[index].value, verifiedResult[index].value)
def test_CreateModelWithLearnerDictionary(self): """ Test the creation of Consensus Model using dictionary of learners """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } expression = "a + b + c" # Act learner = AZorngConsensus.ConsensusLearner(learners=learners, expression=expression) # Assert for k, v in learner.learners.items(): self.assertEqual(learner.learners[k], learners[k]) self.assertEqual(learner.expression, expression) self.assertEqual(learner.name, "Consensus learner") self.assertEqual(learner.verbose, 0) self.assertEqual(learner.imputeData, None) self.assertEqual(learner.NTrainEx, 0) self.assertEqual(learner.basicStat, None) self.assertEqual(learner.weights, None)
def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self): """ Test the creation of custom Consensus Classifier using pre-trained regression classifiers. """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } classifiers = {} for k, v in learners.items(): classifiers[k] = v(self.getRegressionTrainingData()) expression = "a + b + c" # Act classifier = AZorngConsensus.ConsensusClassifier( classifiers=classifiers, expression=expression) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, expression) self.assertNotEqual(classifier.imputeData, None) #self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self): """ Test the creation of default Consensus Classifier using pre-trained classification classifiers. """ # Arrange learners = [ AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner() ] classifiers = [l(self.getRegressionTrainingData()) for l in learners] # Act classifier = AZorngConsensus.ConsensusClassifier( classifiers=classifiers) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, None) self.assertNotEqual(classifier.imputeData, None) #self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_CreateCustomClassificationClassifierUsingTrainingData(self): """ Test the creation of custom classification Classifier by calling learner with training data. """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } expression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] trainingData = self.getClassificationTrainingData() learner = AZorngConsensus.ConsensusLearner(learners=learners, expression=expression) # Act classifier = learner(trainingData) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, expression) self.assertNotEqual(classifier.imputeData, None) self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_CreateDefaultClassifierUsingTrainingData(self): """ Test the creation of default Classifier by calling learner with training data. """ # Arrange learners = [ AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner() ] trainingData = self.getRegressionTrainingData() learner = AZorngConsensus.ConsensusLearner(learners=learners) # Act classifier = learner(trainingData) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertEqual(classifier.expression, None) self.assertEqual(classifier.name, "Consensus classifier") self.assertEqual(classifier.verbose, 0) self.assertNotEqual(classifier.imputeData, None) self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertNotEqual(classifier.basicStat, None) self.assertEqual(classifier.weights, None)
def test_SaveLoadCustomRegressionExpression(self): """ Test save/load custom expression using average N regression with object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Construct expression learner/classifier regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) expressionClassifier = expressionLearner(self.DataReg) # Construct default learner/classifier result = [] for ex in self.DataReg: result.append(expressionClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") expressionClassifier.write(os.path.join(scratchdir, "./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertNotEqual(loaded, None) for ex in self.DataReg: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not float_compare(result[index].value, resultLoaded[index].value): print "Not equal on index: ", index self.assertEqual( float_compare(result[index].value, resultLoaded[index].value), True) self.assertEqual(len(loaded.domain), len(self.DataReg.domain)) self.assertEqual(len(loaded.imputeData), len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def test_SaveLoadCustomLogicalExpression(self): """ Test save/load functionality with a custom logical expression """ # Arrange # Construct expression learner/classifier learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } discreteExpression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) result = [] for ex in self.irisData: result.append(discreteClassifier(ex)) # Act scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") discreteClassifier.write(os.path.join(scratchdir, "./CM.model")) resultLoaded = [] loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) self.assertNotEqual(loaded, None) for ex in self.irisData: resultLoaded.append(loaded(ex)) # Assert for index, item in enumerate(result): if not result[index].value == resultLoaded[index].value: print "Not equal on index: ", index self.assertEqual(result[index].value, resultLoaded[index].value) self.assertEqual(len(loaded.domain), len(self.irisData.domain)) self.assertEqual(len(loaded.imputeData), len(loaded.domain)) self.assertEqual(len(loaded.basicStat), len(loaded.domain)) self.assertEqual(loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_CanPersistRegressionModelUsingClassifiers(self): """Test the save/load for a regression model - Using average of N classifiers""" # Arrange learners = [ AZorngRF.RFLearner(), AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner() ] learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.DataReg) # Act predictions = [] for ex in self.DataReg: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") classifier.write(os.path.join(scratchdir, "./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) for ex in self.DataReg: predictionsL.append(Loaded(ex)) self.assertEqual( [round(pred.value, 4) for pred in predictions], [round(pred.value, 4) for pred in predictionsL], "Loaded model predictions differ: Pred. 1 (saved/loaded):" + str(predictions[0]) + " / " + str(predictionsL[0])) self.assertEqual(len(Loaded.domain), len(self.DataReg.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.DataReg)) miscUtilities.removeDir(scratchdir)
def test_AverageNRegressionExpressionUsingObjMap(self): """ Test regular expression using average N regression with object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Construct expression learner/classifier regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) expressionClassifier = expressionLearner(self.DataReg) # Construct default learner/classifier defaultLearners = [ AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner(), AZorngCvSVM.CvSVMLearner() ] defaultLearner = AZorngConsensus.ConsensusLearner( learners=defaultLearners) defaultClassifier = defaultLearner(self.DataReg) # Act expressionPredictions = [] for ex in self.DataReg: expressionPredictions.append(expressionClassifier(ex)) defaultPredictions = [] for ex in self.DataReg: defaultPredictions.append(defaultClassifier(ex)) # Assert for index in range(len(expressionPredictions)): self.assertEqual( True, float_compare(expressionPredictions[index], defaultPredictions[index]))
def test_CreateLearnerWithObjectMapping(self): """ Test the creation of learners with an object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Act learner = AZorngConsensus.ConsensusLearner(learners=learners) # Assert self.assertEqual(len(learner.learners), len(learners))
def test_CustomRegressionExpressionUsingWeights(self): """ Test regression expression using weights """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } weights = {'a': lambda x: 1, 'b': lambda x: 2, 'c': lambda x: 3} regressionExpression = "(a + b + c) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression, weights=weights) classifier = expressionLearner(self.DataReg) # Act result = [] for ex in self.DataReg: result.append(classifier(ex)) verifiedResult = [] for ex in self.DataReg: a_value = classifier.classifiers['a'](ex) a_weight_value = weights['a'](a_value) b_value = classifier.classifiers['b'](ex) b_weight_value = weights['b'](b_value) c_value = classifier.classifiers['c'](ex) c_weight_value = weights['c'](c_value) prediction = (a_value * a_weight_value + b_value * b_weight_value + c_value * c_weight_value) / 3 verifiedResult.append(prediction) # Assert for index, item in enumerate(result): if float_compare(result[index].value, verifiedResult[index]) == False: print "Not equal on index: ", index print "Result: ", result[ index].value, " Verified: ", verifiedResult[index] print "Delta: ", abs(result[index].value - verifiedResult[index]) self.assertEqual( float_compare(result[index].value, verifiedResult[index]), True)
def test_CreateLearnerWithObjectMappingWithoutExpression(self): """ Test with name variable mapping defined but not expression given """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } learner = AZorngConsensus.ConsensusLearner(learners=learners) # Act classifier = learner(self.DataReg) # Assert self.assertEqual(classifier, None)
def test_CreateDefaultModel(self): """ Test the creation of Consensus Model using no learners """ # Arrange # Act learner = AZorngConsensus.ConsensusLearner() # Assert self.assertEqual(learner.learners, None) self.assertEqual(learner.expression, None) self.assertEqual(learner.name, "Consensus learner") self.assertEqual(learner.verbose, 0) self.assertEqual(learner.imputeData, None) self.assertEqual(learner.NTrainEx, 0) self.assertEqual(learner.basicStat, None) self.assertEqual(learner.weights, None)
def test_CustomLogicalExpressionUsingOrAndStatement(self): """ Test logical expression using OR/AND statements """ # Arrange # Construct verification learners a = AZorngCvSVM.CvSVMLearner() a = a(self.irisData) b = AZorngCvANN.CvANNLearner() b = b(self.irisData) c = AZorngRF.RFLearner() c = c(self.irisData) # Construct expression learner/classifier learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } discreteExpression = [ "a == Iris-setosa and c == Iris-virginica or b == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) # Act result = [] for ex in self.irisData: result.append(discreteClassifier(ex)) verifiedResult = [] for ex in self.irisData: if a(ex).value == "Iris-setosa" and c( ex).value == "Iris-virginica" or b( ex).value == "Iris-setosa": verifiedResult.append("Iris-setosa") else: verifiedResult.append("Iris-virginica") # Assert for index, item in enumerate(result): if not result[index].value == verifiedResult[index]: print "Not equal on index: ", index, " Predicted: ", result[ index].value, " Real: ", verifiedResult[index] self.assertEqual(result[index].value, verifiedResult[index])
def test_InvalidCustomRegressionExpression(self): """ Test invalid custom expression """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } regressionExpression = "(a + b + 3cd45 + c) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) # Act classifier = expressionLearner(self.DataReg) # Assert self.assertEqual(classifier(self.DataReg[0]), None)
def test_InvalidCustomClassificationExpression(self): """ Test invalid custom expression """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } discreteExpression = [ "a == Iris-setosa and or c == Iris-virginica or b == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) # Act classifier = expressionLearner(self.getClassificationTrainingData()) # Assert self.assertEqual(classifier(self.getClassificationTrainingData()[0]), None)
def test_CanCreateClassifierUsingObjMapping(self): """ Test with name variable mapping defined but not expression given """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } discreteExpression = "" regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 2" learner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) # Act classifier = learner(self.DataReg) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), 3) self.assertEqual(classifier.expression, regressionExpression)
def test_CreateModelWithLearnerList(self): """ Test the creation of Consensus Model using list of learners """ # Arrange learners = [ AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner() ] # Act learner = AZorngConsensus.ConsensusLearner(learners=learners) # Assert for i, l in enumerate(learner.learners): self.assertEqual(learner.learners[i], learners[i]) self.assertEqual(learner.expression, None) self.assertEqual(learner.name, "Consensus learner") self.assertEqual(learner.verbose, 0) self.assertEqual(learner.imputeData, None) self.assertEqual(learner.NTrainEx, 0) self.assertEqual(learner.basicStat, None) self.assertEqual(learner.weights, None)
def modelRead(modelFile=None, verbose=0, retrunClassifier=True): """Get the type of model saved in 'modelPath' and loads the respective model Returns the Classifier saved in the respective model path If called without parameters, it returns a list of known classifier types It can returns the classifier, or just a string with the Type modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )""" if not modelFile: return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost", "CvBayes", "Consensus") modelType = None loadedModel = None if os.path.isfile(os.path.join(modelFile, "model.svm")): modelType = "CvSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngCvSVM loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")): modelType = "SignSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngSignSVM loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.ann")): modelType = "CvANN" if not retrunClassifier: return modelType from trainingMethods import AZorngCvANN loadedModel = AZorngCvANN.CvANNread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "Model.pls")): modelType = "PLS" if not retrunClassifier: return modelType from trainingMethods import AZorngPLS loadedModel = AZorngPLS.PLSread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.rf")): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "C0.model")): modelType = "Consensus" if not retrunClassifier: return modelType from trainingMethods import AZorngConsensus loadedModel = AZorngConsensus.Consensusread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.boost")): modelType = "CvBoost" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBoost loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.bayes")): modelType = "CvBayes" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBayes loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose) else: # Assuming an RF old format for backcompatibility try: if os.path.isdir(modelFile): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) else: modelType = None loadedModel = None except: modelType = None loadedModel = None return loadedModel
def buildConsensus(trainData, learners, MLMethods, logFile=None): log( logFile, "Building a consensus model based on optimized MLmethods: " + str([ml for ml in MLMethods]) + "...") if trainData.domain.classVar.varType == orange.VarTypes.Discrete: #Expression: If CAavg_{POS} ge CAavg_{NEG} -> POS else -> NEG # where CAavg_{POS} is the average of classification accuracies of all models predicting POS. CLASS0 = str(trainData.domain.classVar.values[0]) CLASS1 = str(trainData.domain.classVar.values[1]) #exprTest0 exprTest0 = "(0" for ml in MLMethods: exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str( MLMethods[ml]["optAcc"]) + " " exprTest0 += ")/IF0(sum([False" for ml in MLMethods: exprTest0 += ", " + ml + " == " + CLASS0 + " " exprTest0 += "]),1)" # exprTest1 exprTest1 = "(0" for ml in MLMethods: exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str( MLMethods[ml]["optAcc"]) + " " exprTest1 += ")/IF0(sum([False" for ml in MLMethods: exprTest1 += ", " + ml + " == " + CLASS1 + " " exprTest1 += "]),1)" # expression expression = [ exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1 ] else: Q2sum = sum([MLMethods[ml]["optAcc"] for ml in MLMethods]) expression = "(1 / " + str(Q2sum) + ") * (0" for ml in MLMethods: expression += " + " + str( MLMethods[ml]["optAcc"]) + " * " + ml + " " expression += ")" consensusLearners = {} for learnerName in learners: consensusLearners[learnerName] = learners[learnerName] learner = AZorngConsensus.ConsensusLearner(learners=consensusLearners, expression=expression) log(logFile, " Training Consensus Learner") smilesAttr = dataUtilities.getSMILESAttr(trainData) if smilesAttr: log(logFile, "Found SMILES attribute:" + smilesAttr) if learner.specialType == 1: trainData = dataUtilities.attributeSelectionData( trainData, [smilesAttr, trainData.domain.classVar.name]) log( logFile, "Selected attrs: " + str([attr.name for attr in trainData.domain])) else: trainData = dataUtilities.attributeDeselectionData( trainData, [smilesAttr]) log(logFile,"Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] +\ [attr.name for attr in trainData.domain[len(trainData.domain)-3:]])) return learner(trainData)
def getAcc(self, callBack=None, callBackWithFoldModel=None): """ For regression problems, it returns the RMSE and the Q2 For Classification problems, it returns CA and the ConfMat The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]} For the EvalResults not supported for a specific learner/datase, the respective result will be None if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus made out of those that were stable It some error occurred, the respective values in the Dict will be None """ self.__log("Starting Calculating MLStatistics") statistics = {} if not self.__areInputsOK(): return None # Set the response type self.responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression" self.__log(" " + str(self.responseType)) #Create the Train and test sets if self.usePreDefFolds: DataIdxs = self.preDefIndices else: DataIdxs = self.sampler(self.data, self.nExtFolds) foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0 ] #Folds used only from 1 on ... 0 are for fixed train Bias nFolds = len(foldsN) #Fix the Indexes based on DataIdxs # (0s) represents the train set ( >= 1s) represents the test set folds if self.useVarCtrlCV: nShifted = [0] * nFolds for idx, isTest in enumerate( self.preDefIndices ): # self.preDefIndices == 0 are to be used in TrainBias if not isTest: if DataIdxs[idx]: nShifted[DataIdxs[idx]] += 1 DataIdxs[idx] = 0 for idx, shift in enumerate(nShifted): self.__log("In fold " + str(idx) + ", " + str(shift) + " examples were shifted to the train set.") #Var for saving each Fols result optAcc = {} results = {} exp_pred = {} nTrainEx = {} nTestEx = {} #Set a dict of learners MLmethods = {} if type(self.learner) == dict: for ml in self.learner: MLmethods[ml] = self.learner[ml] else: MLmethods[self.learner.name] = self.learner models = {} self.__log("Calculating Statistics for MLmethods:") self.__log(" " + str([x for x in MLmethods])) #Check data in advance so that, by chance, it will not faill at the last fold! for foldN in foldsN: trainData = self.data.select(DataIdxs, foldN, negate=1) self.__checkTrainData(trainData) #Optional!! # Order Learners so that PLS is the first sortedML = [ml for ml in MLmethods] if "PLS" in sortedML: sortedML.remove("PLS") sortedML.insert(0, "PLS") stepsDone = 0 nTotalSteps = len(sortedML) * self.nExtFolds for ml in sortedML: startTime = time.time() self.__log(" > " + str(ml) + "...") try: #Var for saving each Fols result results[ml] = [] exp_pred[ml] = [] models[ml] = [] nTrainEx[ml] = [] nTestEx[ml] = [] optAcc[ml] = [] logTxt = "" for foldN in foldsN: if type(self.learner) == dict: self.paramList = None trainData = self.data.select(DataIdxs, foldN, negate=1) testData = self.data.select(DataIdxs, foldN) smilesAttr = dataUtilities.getSMILESAttr(trainData) if smilesAttr: self.__log("Found SMILES attribute:" + smilesAttr) if MLmethods[ml].specialType == 1: trainData = dataUtilities.attributeSelectionData( trainData, [smilesAttr, trainData.domain.classVar.name]) testData = dataUtilities.attributeSelectionData( testData, [smilesAttr, testData.domain.classVar.name]) self.__log( "Selected attrs: " + str([attr.name for attr in trainData.domain])) else: trainData = dataUtilities.attributeDeselectionData( trainData, [smilesAttr]) testData = dataUtilities.attributeDeselectionData( testData, [smilesAttr]) self.__log("Selected attrs: " + str( [attr.name for attr in trainData.domain[0:3]] + ["..."] + [ attr.name for attr in trainData. domain[len(trainData.domain) - 3:] ])) nTrainEx[ml].append(len(trainData)) nTestEx[ml].append(len(testData)) #Test if trainsets inside optimizer will respect dataSize criterias. # if not, don't optimize, but still train the model dontOptimize = False if self.responseType != "Classification" and ( len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20): dontOptimize = True else: tmpDataIdxs = self.sampler(trainData, self.nInnerFolds) tmpTrainData = trainData.select(tmpDataIdxs, 1, negate=1) if not self.__checkTrainData(tmpTrainData, False): dontOptimize = True SpecialModel = None if dontOptimize: logTxt += " Fold " + str( foldN ) + ": Too few compounds to optimize model hyper-parameters\n" self.__log(logTxt) if trainData.domain.classVar.varType == orange.VarTypes.Discrete: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) CA = evalUtilities.CA(res)[0] optAcc[ml].append(CA) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: if MLmethods[ml].specialType == 1: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optInfo, SpecialModel = MLmethods[ ml].optimizePars(trainData, folds=5) optAcc[ml].append(optInfo["Acc"]) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint(0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) else: runPath = miscUtilities.createScratchDir( baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData)) trainData.save( os.path.join(runPath, "trainData.tab")) tunedPars = paramOptUtilities.getOptParam( learner=MLmethods[ml], trainDataFile=os.path.join( runPath, "trainData.tab"), paramList=self.paramList, useGrid=False, verbose=self.verbose, queueType=self.queueType, runPath=runPath, nExtFolds=None, nFolds=self.nInnerFolds, logFile=self.logFile, getTunedPars=True, fixedParams=self.fixedParams) if not MLmethods[ml] or not MLmethods[ml].optimized: self.__log( " WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized.") self.__log( " It will be ignored") #self.__log(" It will be set to default parameters") self.__log( " DEBUG can be done in: " + runPath) #Set learner back to default #MLmethods[ml] = MLmethods[ml].__class__() raise Exception("The learner " + str(ml) + " was not optimized.") else: if trainData.domain.classVar.varType == orange.VarTypes.Discrete: optAcc[ml].append(tunedPars[0]) else: res = evalUtilities.crossValidation( [MLmethods[ml]], trainData, folds=5, stratified=orange.MakeRandomIndices. StratifiedIfPossible, random_generator=random.randint( 0, 100)) R2 = evalUtilities.R2(res)[0] optAcc[ml].append(R2) miscUtilities.removeDir(runPath) #Train the model if SpecialModel is not None: model = SpecialModel else: model = MLmethods[ml](trainData) models[ml].append(model) #Test the model if self.responseType == "Classification": results[ml].append( (evalUtilities.getClassificationAccuracy( testData, model), evalUtilities.getConfMat(testData, model))) else: local_exp_pred = [] # Predict using bulk-predict predictions = model(testData) # Gather predictions for n, ex in enumerate(testData): local_exp_pred.append( (ex.getclass().value, predictions[n].value)) results[ml].append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))) #Save the experimental value and correspondent predicted value exp_pred[ml] += local_exp_pred if callBack: stepsDone += 1 if not callBack((100 * stepsDone) / nTotalSteps): return None if callBackWithFoldModel: callBackWithFoldModel(model) res = self.createStatObj( results[ml], exp_pred[ml], nTrainEx[ml], nTestEx[ml], self.responseType, self.nExtFolds, logTxt, labels=hasattr(self.data.domain.classVar, "values") and list(self.data.domain.classVar.values) or None) if self.verbose > 0: print "UnbiasedAccuracyGetter!Results " + ml + ":\n" pprint(res) if not res: raise Exception("No results available!") res["runningTime"] = time.time() - startTime statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) self.__log(" OK") except: self.__log(" Learner " + str(ml) + " failed to create/optimize the model!") error = str(sys.exc_info()[0]) +" "+\ str(sys.exc_info()[1]) +" "+\ str(traceback.extract_tb(sys.exc_info()[2])) self.__log(error) res = self.createStatObj() statistics[ml] = copy.deepcopy(res) self.__writeResults(statistics) if not statistics or len(statistics) < 1: self.__log("ERROR: No statistics to return!") return None elif len(statistics) > 1: #We still need to build a consensus model out of the stable models # ONLY if there are more that one model stable! # When only one or no stable models, build a consensus based on all models # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0) consensusMLs = {} for modelName in statistics: StabilityValue = statistics[modelName]["StabilityValue"] if StabilityValue is not None and statistics[modelName][ "stable"]: consensusMLs[modelName] = copy.deepcopy( statistics[modelName]) self.__log("Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods.") if len(consensusMLs ) <= 1: # we need more models to build a consensus! consensusMLs = {} for modelName in statistics: consensusMLs[modelName] = copy.deepcopy( statistics[modelName]) # Exclude specialType models excludeThis = [] for learnerName in consensusMLs: if models[learnerName][0].specialType > 0: excludeThis.append(learnerName) for learnerName in excludeThis: consensusMLs.pop(learnerName) self.__log(" > Excluded special model " + learnerName) self.__log(" > Stable modules: " + str(consensusMLs.keys())) if len(consensusMLs) >= 2: #Var for saving each Fols result startTime = time.time() Cresults = [] Cexp_pred = [] CnTrainEx = [] CnTestEx = [] self.__log( "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs])) for foldN in range(self.nExtFolds): if self.responseType == "Classification": CLASS0 = str(self.data.domain.classVar.values[0]) CLASS1 = str(self.data.domain.classVar.values[1]) # exprTest0 exprTest0 = "(0" for ml in consensusMLs: exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str( optAcc[ml][foldN]) + " " exprTest0 += ")/IF0(sum([False" for ml in consensusMLs: exprTest0 += ", " + ml + " == " + CLASS0 + " " exprTest0 += "]),1)" # exprTest1 exprTest1 = "(0" for ml in consensusMLs: exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str( optAcc[ml][foldN]) + " " exprTest1 += ")/IF0(sum([False" for ml in consensusMLs: exprTest1 += ", " + ml + " == " + CLASS1 + " " exprTest1 += "]),1)" # Expression expression = [ exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1 ] else: Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs]) expression = "(1 / " + str(Q2sum) + ") * (0" for ml in consensusMLs: expression += " + " + str( optAcc[ml][foldN]) + " * " + ml + " " expression += ")" testData = self.data.select( DataIdxs, foldN + 1) # fold 0 if for the train Bias!! smilesAttr = dataUtilities.getSMILESAttr(testData) if smilesAttr: self.__log("Found SMILES attribute:" + smilesAttr) testData = dataUtilities.attributeDeselectionData( testData, [smilesAttr]) self.__log("Selected attrs: " + str( [attr.name for attr in trainData.domain[0:3]] + ["..."] + [ attr.name for attr in trainData.domain[len(trainData.domain) - 3:] ])) CnTestEx.append(len(testData)) consensusClassifiers = {} for learnerName in consensusMLs: consensusClassifiers[learnerName] = models[ learnerName][foldN] model = AZorngConsensus.ConsensusClassifier( classifiers=consensusClassifiers, expression=expression) CnTrainEx.append(model.NTrainEx) #Test the model if self.responseType == "Classification": Cresults.append( (evalUtilities.getClassificationAccuracy( testData, model), evalUtilities.getConfMat(testData, model))) else: local_exp_pred = [] # Predict using bulk-predict predictions = model(testData) # Gather predictions for n, ex in enumerate(testData): local_exp_pred.append( (ex.getclass().value, predictions[n].value)) Cresults.append( (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))) #Save the experimental value and correspondent predicted value Cexp_pred += local_exp_pred res = self.createStatObj( Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds, labels=hasattr(self.data.domain.classVar, "values") and list(self.data.domain.classVar.values) or None) res["runningTime"] = time.time() - startTime statistics["Consensus"] = copy.deepcopy(res) statistics["Consensus"][ "IndividualStatistics"] = copy.deepcopy(consensusMLs) self.__writeResults(statistics) self.__log("Returned multiple ML methods statistics.") return statistics #By default return the only existing statistics! self.__writeResults(statistics) self.__log("Returned only one ML method statistics.") return statistics[statistics.keys()[0]]