def test_CanPersistClassificationModelProbabilities(self):
        """Test the save/load for a classification model - Using probabilities average"""

        # Arrange
        learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()]
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.irisData)

        # Act
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        print scratchdir
        classifier.write(os.path.join(scratchdir, "./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions, predictionsL)
        self.assertEqual(len(Loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
    def test_CanPersistClassificationModelMajority(self):
        """Test the save/load for a classification model - Using Majority"""
        """ Arrange """
        learners = self.createTestLearners()
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.getClassificationTrainingData())
        """ Act """
        predictions = []
        for ex in self.irisData:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir, "./CM.model"))
        """ Assert """
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertEqual(len(Loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.irisData))
        for ex in self.irisData:
            predictionsL.append(Loaded(ex))

        self.assertEqual(predictions, predictionsL)

        miscUtilities.removeDir(scratchdir)
    def test_CreateLogicalExpressionConsensusLearner(self):
        """ Test creation of logical expression consensus learner """
        # Arrange

        # Construct expression learner/classifier
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }
        discreteExpression = [
            "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"
        ]
        discreteLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=discreteExpression)
        discreteClassifier = discreteLearner(self.irisData)

        verifiedLearner = AZorngCvSVM.CvSVMLearner()
        verifiedClassifier = verifiedLearner(self.irisData)

        # Act
        result = []
        verifiedResult = []
        for ex in self.irisData:
            result.append(discreteClassifier(ex))
            verifiedResult.append(verifiedClassifier(ex))

        # Assert
        for index, item in enumerate(result):
            if not result[index].value == verifiedResult[index].value:
                print "Not equal on index: ", index
            self.assertEqual(result[index].value, verifiedResult[index].value)
    def test_CreateModelWithLearnerDictionary(self):
        """ Test the creation of Consensus Model using dictionary of learners """

        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }
        expression = "a + b + c"

        # Act
        learner = AZorngConsensus.ConsensusLearner(learners=learners,
                                                   expression=expression)

        # Assert
        for k, v in learner.learners.items():
            self.assertEqual(learner.learners[k], learners[k])
        self.assertEqual(learner.expression, expression)
        self.assertEqual(learner.name, "Consensus learner")
        self.assertEqual(learner.verbose, 0)
        self.assertEqual(learner.imputeData, None)
        self.assertEqual(learner.NTrainEx, 0)
        self.assertEqual(learner.basicStat, None)
        self.assertEqual(learner.weights, None)
    def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self):
        """ Test the creation of custom Consensus Classifier using pre-trained regression classifiers. """

        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }
        classifiers = {}
        for k, v in learners.items():
            classifiers[k] = v(self.getRegressionTrainingData())
        expression = "a + b + c"

        # Act
        classifier = AZorngConsensus.ConsensusClassifier(
            classifiers=classifiers, expression=expression)

        # Assert
        self.assertNotEqual(classifier, None)
        self.assertEqual(len(classifier.classifiers), len(learners))
        self.assertNotEqual(classifier.basicStat, None)
        self.assertNotEqual(classifier.classVar, None)
        self.assertNotEqual(classifier.domain, None)
        self.assertEqual(classifier.expression, expression)
        self.assertNotEqual(classifier.imputeData, None)
        #self.assertEqual(classifier.NTrainEx, len(trainingData))
        self.assertEqual(classifier.name, "Consensus classifier")
        self.assertNotEqual(classifier.varNames, None)
        self.assertEqual(classifier.verbose, 0)
        self.assertEqual(classifier.weights, None)
    def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self):
        """ Test the creation of default Consensus Classifier using pre-trained classification classifiers. """

        # Arrange
        learners = [
            AZorngCvSVM.CvSVMLearner(),
            AZorngCvANN.CvANNLearner(),
            AZorngRF.RFLearner()
        ]
        classifiers = [l(self.getRegressionTrainingData()) for l in learners]

        # Act
        classifier = AZorngConsensus.ConsensusClassifier(
            classifiers=classifiers)

        # Assert
        self.assertNotEqual(classifier, None)
        self.assertEqual(len(classifier.classifiers), len(learners))
        self.assertNotEqual(classifier.basicStat, None)
        self.assertNotEqual(classifier.classVar, None)
        self.assertNotEqual(classifier.domain, None)
        self.assertEqual(classifier.expression, None)
        self.assertNotEqual(classifier.imputeData, None)
        #self.assertEqual(classifier.NTrainEx, len(trainingData))
        self.assertEqual(classifier.name, "Consensus classifier")
        self.assertNotEqual(classifier.varNames, None)
        self.assertEqual(classifier.verbose, 0)
        self.assertEqual(classifier.weights, None)
    def test_CreateCustomClassificationClassifierUsingTrainingData(self):
        """ Test the creation of custom classification Classifier by calling learner with training data. """

        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }
        expression = [
            "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"
        ]
        trainingData = self.getClassificationTrainingData()
        learner = AZorngConsensus.ConsensusLearner(learners=learners,
                                                   expression=expression)

        # Act
        classifier = learner(trainingData)

        # Assert
        self.assertNotEqual(classifier, None)
        self.assertEqual(len(classifier.classifiers), len(learners))
        self.assertNotEqual(classifier.basicStat, None)
        self.assertNotEqual(classifier.classVar, None)
        self.assertNotEqual(classifier.domain, None)
        self.assertEqual(classifier.expression, expression)
        self.assertNotEqual(classifier.imputeData, None)
        self.assertEqual(classifier.NTrainEx, len(trainingData))
        self.assertEqual(classifier.name, "Consensus classifier")
        self.assertNotEqual(classifier.varNames, None)
        self.assertEqual(classifier.verbose, 0)
        self.assertEqual(classifier.weights, None)
    def test_CreateDefaultClassifierUsingTrainingData(self):
        """ Test the creation of default Classifier by calling learner with training data. """

        # Arrange
        learners = [
            AZorngCvSVM.CvSVMLearner(),
            AZorngCvANN.CvANNLearner(),
            AZorngRF.RFLearner()
        ]
        trainingData = self.getRegressionTrainingData()
        learner = AZorngConsensus.ConsensusLearner(learners=learners)

        # Act
        classifier = learner(trainingData)

        # Assert
        self.assertNotEqual(classifier, None)
        self.assertEqual(len(classifier.classifiers), len(learners))
        self.assertEqual(classifier.expression, None)
        self.assertEqual(classifier.name, "Consensus classifier")
        self.assertEqual(classifier.verbose, 0)
        self.assertNotEqual(classifier.imputeData, None)
        self.assertEqual(classifier.NTrainEx, len(trainingData))
        self.assertNotEqual(classifier.basicStat, None)
        self.assertEqual(classifier.weights, None)
    def test_SaveLoadCustomRegressionExpression(self):
        """ Test save/load custom expression using average N regression with object map """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        # Construct expression learner/classifier
        regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=regressionExpression)
        expressionClassifier = expressionLearner(self.DataReg)

        # Construct default learner/classifier
        result = []
        for ex in self.DataReg:
            result.append(expressionClassifier(ex))

        # Act
        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        expressionClassifier.write(os.path.join(scratchdir, "./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.DataReg:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not float_compare(result[index].value,
                                 resultLoaded[index].value):
                print "Not equal on index: ", index
            self.assertEqual(
                float_compare(result[index].value, resultLoaded[index].value),
                True)

        self.assertEqual(len(loaded.domain), len(self.DataReg.domain))
        self.assertEqual(len(loaded.imputeData), len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Exemple #10
0
    def test_SaveLoadCustomLogicalExpression(self):
        """ Test save/load functionality with a custom logical expression """
        # Arrange

        # Construct expression learner/classifier
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }
        discreteExpression = [
            "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica"
        ]
        discreteLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=discreteExpression)
        discreteClassifier = discreteLearner(self.irisData)

        result = []
        for ex in self.irisData:
            result.append(discreteClassifier(ex))

        # Act
        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        discreteClassifier.write(os.path.join(scratchdir, "./CM.model"))

        resultLoaded = []
        loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        self.assertNotEqual(loaded, None)
        for ex in self.irisData:
            resultLoaded.append(loaded(ex))

        # Assert
        for index, item in enumerate(result):
            if not result[index].value == resultLoaded[index].value:
                print "Not equal on index: ", index
            self.assertEqual(result[index].value, resultLoaded[index].value)

        self.assertEqual(len(loaded.domain), len(self.irisData.domain))
        self.assertEqual(len(loaded.imputeData), len(loaded.domain))
        self.assertEqual(len(loaded.basicStat), len(loaded.domain))
        self.assertEqual(loaded.NTrainEx, len(self.irisData))

        miscUtilities.removeDir(scratchdir)
Exemple #11
0
    def test_CanPersistRegressionModelUsingClassifiers(self):
        """Test the save/load for a regression model - Using average of N classifiers"""

        # Arrange
        learners = [
            AZorngRF.RFLearner(),
            AZorngCvSVM.CvSVMLearner(),
            AZorngCvANN.CvANNLearner()
        ]
        learner = AZorngConsensus.ConsensusLearner(learners=learners)
        classifier = learner(self.DataReg)

        # Act
        predictions = []
        for ex in self.DataReg:
            predictions.append(classifier(ex))

        scratchdir = miscUtilities.createScratchDir(
            desc="ConsensusSaveLoadTest")
        classifier.write(os.path.join(scratchdir, "./CM.model"))

        # Assert
        predictionsL = []
        Loaded = AZorngConsensus.Consensusread(
            os.path.join(scratchdir, "./CM.model"))
        for ex in self.DataReg:
            predictionsL.append(Loaded(ex))

        self.assertEqual(
            [round(pred.value, 4) for pred in predictions],
            [round(pred.value, 4) for pred in predictionsL],
            "Loaded model predictions differ: Pred. 1 (saved/loaded):" +
            str(predictions[0]) + " / " + str(predictionsL[0]))

        self.assertEqual(len(Loaded.domain), len(self.DataReg.domain))
        self.assertEqual(len(Loaded.imputeData), len(Loaded.domain))
        self.assertEqual(len(Loaded.basicStat), len(Loaded.domain))
        self.assertEqual(Loaded.NTrainEx, len(self.DataReg))

        miscUtilities.removeDir(scratchdir)
Exemple #12
0
    def test_AverageNRegressionExpressionUsingObjMap(self):
        """ Test regular expression using average N regression with object map """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        # Construct expression learner/classifier
        regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=regressionExpression)
        expressionClassifier = expressionLearner(self.DataReg)

        # Construct default learner/classifier
        defaultLearners = [
            AZorngRF.RFLearner(),
            AZorngCvANN.CvANNLearner(),
            AZorngCvSVM.CvSVMLearner()
        ]
        defaultLearner = AZorngConsensus.ConsensusLearner(
            learners=defaultLearners)
        defaultClassifier = defaultLearner(self.DataReg)

        # Act
        expressionPredictions = []
        for ex in self.DataReg:
            expressionPredictions.append(expressionClassifier(ex))

        defaultPredictions = []
        for ex in self.DataReg:
            defaultPredictions.append(defaultClassifier(ex))

        # Assert
        for index in range(len(expressionPredictions)):
            self.assertEqual(
                True,
                float_compare(expressionPredictions[index],
                              defaultPredictions[index]))
Exemple #13
0
    def test_CreateLearnerWithObjectMapping(self):
        """ Test the creation of learners with an object map """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        # Act
        learner = AZorngConsensus.ConsensusLearner(learners=learners)

        # Assert
        self.assertEqual(len(learner.learners), len(learners))
Exemple #14
0
    def test_CustomRegressionExpressionUsingWeights(self):
        """ Test regression expression using weights """
        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }

        weights = {'a': lambda x: 1, 'b': lambda x: 2, 'c': lambda x: 3}

        regressionExpression = "(a + b + c) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners,
            expression=regressionExpression,
            weights=weights)
        classifier = expressionLearner(self.DataReg)

        # Act
        result = []
        for ex in self.DataReg:
            result.append(classifier(ex))

        verifiedResult = []
        for ex in self.DataReg:
            a_value = classifier.classifiers['a'](ex)
            a_weight_value = weights['a'](a_value)
            b_value = classifier.classifiers['b'](ex)
            b_weight_value = weights['b'](b_value)
            c_value = classifier.classifiers['c'](ex)
            c_weight_value = weights['c'](c_value)

            prediction = (a_value * a_weight_value + b_value * b_weight_value +
                          c_value * c_weight_value) / 3

            verifiedResult.append(prediction)

        # Assert
        for index, item in enumerate(result):
            if float_compare(result[index].value,
                             verifiedResult[index]) == False:
                print "Not equal on index: ", index
                print "Result: ", result[
                    index].value, " Verified: ", verifiedResult[index]
                print "Delta: ", abs(result[index].value -
                                     verifiedResult[index])
            self.assertEqual(
                float_compare(result[index].value, verifiedResult[index]),
                True)
Exemple #15
0
    def test_CreateLearnerWithObjectMappingWithoutExpression(self):
        """ Test with name variable mapping defined but not expression given """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        learner = AZorngConsensus.ConsensusLearner(learners=learners)

        # Act
        classifier = learner(self.DataReg)

        # Assert
        self.assertEqual(classifier, None)
Exemple #16
0
    def test_CreateDefaultModel(self):
        """ Test the creation of Consensus Model using no learners """

        # Arrange

        # Act
        learner = AZorngConsensus.ConsensusLearner()

        # Assert
        self.assertEqual(learner.learners, None)
        self.assertEqual(learner.expression, None)
        self.assertEqual(learner.name, "Consensus learner")
        self.assertEqual(learner.verbose, 0)
        self.assertEqual(learner.imputeData, None)
        self.assertEqual(learner.NTrainEx, 0)
        self.assertEqual(learner.basicStat, None)
        self.assertEqual(learner.weights, None)
Exemple #17
0
    def test_CustomLogicalExpressionUsingOrAndStatement(self):
        """ Test logical expression using OR/AND statements """
        # Arrange

        # Construct verification learners
        a = AZorngCvSVM.CvSVMLearner()
        a = a(self.irisData)
        b = AZorngCvANN.CvANNLearner()
        b = b(self.irisData)
        c = AZorngRF.RFLearner()
        c = c(self.irisData)

        # Construct expression learner/classifier
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }
        discreteExpression = [
            "a == Iris-setosa and c == Iris-virginica or b == Iris-setosa -> Iris-setosa",
            "-> Iris-virginica"
        ]
        discreteLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=discreteExpression)
        discreteClassifier = discreteLearner(self.irisData)

        # Act
        result = []
        for ex in self.irisData:
            result.append(discreteClassifier(ex))

        verifiedResult = []
        for ex in self.irisData:
            if a(ex).value == "Iris-setosa" and c(
                    ex).value == "Iris-virginica" or b(
                        ex).value == "Iris-setosa":
                verifiedResult.append("Iris-setosa")
            else:
                verifiedResult.append("Iris-virginica")

        # Assert
        for index, item in enumerate(result):
            if not result[index].value == verifiedResult[index]:
                print "Not equal on index: ", index, " Predicted: ", result[
                    index].value, " Real: ", verifiedResult[index]
            self.assertEqual(result[index].value, verifiedResult[index])
Exemple #18
0
    def test_InvalidCustomRegressionExpression(self):
        """ Test invalid custom expression """
        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }

        regressionExpression = "(a + b + 3cd45 + c) / 3"
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=regressionExpression)

        # Act
        classifier = expressionLearner(self.DataReg)

        # Assert
        self.assertEqual(classifier(self.DataReg[0]), None)
Exemple #19
0
    def test_InvalidCustomClassificationExpression(self):
        """ Test invalid custom expression """
        # Arrange
        learners = {
            'a': AZorngCvSVM.CvSVMLearner(),
            'b': AZorngCvANN.CvANNLearner(),
            'c': AZorngRF.RFLearner()
        }

        discreteExpression = [
            "a == Iris-setosa and or c == Iris-virginica or b == Iris-setosa -> Iris-setosa",
            "-> Iris-virginica"
        ]
        expressionLearner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=discreteExpression)

        # Act
        classifier = expressionLearner(self.getClassificationTrainingData())

        # Assert
        self.assertEqual(classifier(self.getClassificationTrainingData()[0]),
                         None)
Exemple #20
0
    def test_CanCreateClassifierUsingObjMapping(self):
        """ Test with name variable mapping defined but not expression given """
        # Arrange
        learners = {
            'firstLearner': AZorngCvSVM.CvSVMLearner(),
            'secondLearner': AZorngCvANN.CvANNLearner(),
            'thirdLearner': AZorngRF.RFLearner()
        }

        discreteExpression = ""
        regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 2"

        learner = AZorngConsensus.ConsensusLearner(
            learners=learners, expression=regressionExpression)

        # Act
        classifier = learner(self.DataReg)

        # Assert
        self.assertNotEqual(classifier, None)
        self.assertEqual(len(classifier.classifiers), 3)
        self.assertEqual(classifier.expression, regressionExpression)
Exemple #21
0
    def test_CreateModelWithLearnerList(self):
        """ Test the creation of Consensus Model using list of learners """

        # Arrange
        learners = [
            AZorngCvSVM.CvSVMLearner(),
            AZorngCvANN.CvANNLearner(),
            AZorngRF.RFLearner()
        ]

        # Act
        learner = AZorngConsensus.ConsensusLearner(learners=learners)

        # Assert
        for i, l in enumerate(learner.learners):
            self.assertEqual(learner.learners[i], learners[i])

        self.assertEqual(learner.expression, None)
        self.assertEqual(learner.name, "Consensus learner")
        self.assertEqual(learner.verbose, 0)
        self.assertEqual(learner.imputeData, None)
        self.assertEqual(learner.NTrainEx, 0)
        self.assertEqual(learner.basicStat, None)
        self.assertEqual(learner.weights, None)
Exemple #22
0
def modelRead(modelFile=None, verbose=0, retrunClassifier=True):
    """Get the type of model saved in 'modelPath' and loads the respective model
       Returns the Classifier saved in the respective model path
       If called without parameters, it returns a list of known classifier types
       It can returns the classifier, or just a string with the Type

            modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )"""

    if not modelFile:
        return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost",
                "CvBayes", "Consensus")

    modelType = None
    loadedModel = None
    if os.path.isfile(os.path.join(modelFile, "model.svm")):
        modelType = "CvSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvSVM
        loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")):
        modelType = "SignSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngSignSVM
        loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.ann")):
        modelType = "CvANN"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvANN
        loadedModel = AZorngCvANN.CvANNread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "Model.pls")):
        modelType = "PLS"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngPLS
        loadedModel = AZorngPLS.PLSread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.rf")):
        modelType = "RF"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngRF
        loadedModel = AZorngRF.RFread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "C0.model")):
        modelType = "Consensus"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngConsensus
        loadedModel = AZorngConsensus.Consensusread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.boost")):
        modelType = "CvBoost"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBoost
        loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.bayes")):
        modelType = "CvBayes"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBayes
        loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose)
    else:  # Assuming an RF old format for backcompatibility
        try:
            if os.path.isdir(modelFile):
                modelType = "RF"
                if not retrunClassifier: return modelType
                from trainingMethods import AZorngRF
                loadedModel = AZorngRF.RFread(modelFile, verbose)
            else:
                modelType = None
                loadedModel = None
        except:
            modelType = None
            loadedModel = None

    return loadedModel
Exemple #23
0
def buildConsensus(trainData, learners, MLMethods, logFile=None):
    log(
        logFile, "Building a consensus model based on optimized MLmethods: " +
        str([ml for ml in MLMethods]) + "...")
    if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
        #Expression: If  CAavg_{POS} ge CAavg_{NEG} -> POS  else -> NEG
        #    where CAavg_{POS} is the average of classification accuracies of all models predicting POS.
        CLASS0 = str(trainData.domain.classVar.values[0])
        CLASS1 = str(trainData.domain.classVar.values[1])
        #exprTest0
        exprTest0 = "(0"
        for ml in MLMethods:
            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(
                MLMethods[ml]["optAcc"]) + " "
        exprTest0 += ")/IF0(sum([False"
        for ml in MLMethods:
            exprTest0 += ", " + ml + " == " + CLASS0 + " "
        exprTest0 += "]),1)"
        # exprTest1
        exprTest1 = "(0"
        for ml in MLMethods:
            exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str(
                MLMethods[ml]["optAcc"]) + " "
        exprTest1 += ")/IF0(sum([False"
        for ml in MLMethods:
            exprTest1 += ", " + ml + " == " + CLASS1 + " "
        exprTest1 += "]),1)"
        # expression
        expression = [
            exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1
        ]
    else:
        Q2sum = sum([MLMethods[ml]["optAcc"] for ml in MLMethods])
        expression = "(1 / " + str(Q2sum) + ") * (0"
        for ml in MLMethods:
            expression += " + " + str(
                MLMethods[ml]["optAcc"]) + " * " + ml + " "
        expression += ")"

    consensusLearners = {}
    for learnerName in learners:
        consensusLearners[learnerName] = learners[learnerName]

    learner = AZorngConsensus.ConsensusLearner(learners=consensusLearners,
                                               expression=expression)
    log(logFile, "  Training Consensus Learner")
    smilesAttr = dataUtilities.getSMILESAttr(trainData)
    if smilesAttr:
        log(logFile, "Found SMILES attribute:" + smilesAttr)
        if learner.specialType == 1:
            trainData = dataUtilities.attributeSelectionData(
                trainData, [smilesAttr, trainData.domain.classVar.name])
            log(
                logFile, "Selected attrs: " +
                str([attr.name for attr in trainData.domain]))
        else:
            trainData = dataUtilities.attributeDeselectionData(
                trainData, [smilesAttr])
            log(logFile,"Selected attrs: "+str([attr.name for attr in trainData.domain[0:3]] + ["..."] +\
                                           [attr.name for attr in trainData.domain[len(trainData.domain)-3:]]))

    return learner(trainData)
Exemple #24
0
    def getAcc(self, callBack=None, callBackWithFoldModel=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None
        # Set the response type
        self.responseType = self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        self.__log("  " + str(self.responseType))

        #Create the Train and test sets
        if self.usePreDefFolds:
            DataIdxs = self.preDefIndices
        else:
            DataIdxs = self.sampler(self.data, self.nExtFolds)
        foldsN = [f for f in dict.fromkeys(DataIdxs) if f != 0
                  ]  #Folds used only from 1 on ... 0 are for fixed train Bias
        nFolds = len(foldsN)
        #Fix the Indexes based on DataIdxs
        # (0s) represents the train set  ( >= 1s) represents the test set folds
        if self.useVarCtrlCV:
            nShifted = [0] * nFolds
            for idx, isTest in enumerate(
                    self.preDefIndices
            ):  # self.preDefIndices == 0 are to be used in TrainBias
                if not isTest:
                    if DataIdxs[idx]:
                        nShifted[DataIdxs[idx]] += 1
                        DataIdxs[idx] = 0
            for idx, shift in enumerate(nShifted):
                self.__log("In fold " + str(idx) + ", " + str(shift) +
                           " examples were shifted to the train set.")

        #Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        #Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        #Check data in advance so that, by chance, it will not faill at the last fold!
        for foldN in foldsN:
            trainData = self.data.select(DataIdxs, foldN, negate=1)
            self.__checkTrainData(trainData)

        #Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds
        for ml in sortedML:
            startTime = time.time()
            self.__log("    > " + str(ml) + "...")
            try:
                #Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []
                logTxt = ""
                for foldN in foldsN:
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs, foldN, negate=1)
                    testData = self.data.select(DataIdxs, foldN)
                    smilesAttr = dataUtilities.getSMILESAttr(trainData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        if MLmethods[ml].specialType == 1:
                            trainData = dataUtilities.attributeSelectionData(
                                trainData,
                                [smilesAttr, trainData.domain.classVar.name])
                            testData = dataUtilities.attributeSelectionData(
                                testData,
                                [smilesAttr, testData.domain.classVar.name])
                            self.__log(
                                "Selected attrs: " +
                                str([attr.name for attr in trainData.domain]))
                        else:
                            trainData = dataUtilities.attributeDeselectionData(
                                trainData, [smilesAttr])
                            testData = dataUtilities.attributeDeselectionData(
                                testData, [smilesAttr])
                            self.__log("Selected attrs: " + str(
                                [attr.name for attr in trainData.domain[0:3]] +
                                ["..."] + [
                                    attr.name for attr in trainData.
                                    domain[len(trainData.domain) - 3:]
                                ]))

                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    #Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (
                            len(trainData) *
                        (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = self.sampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs,
                                                        1,
                                                        negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    SpecialModel = None
                    if dontOptimize:
                        logTxt += "       Fold " + str(
                            foldN
                        ) + ": Too few compounds to optimize model hyper-parameters\n"
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = evalUtilities.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                stratified=orange.MakeRandomIndices.
                                StratifiedIfPossible,
                                random_generator=random.randint(0, 100))
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        if MLmethods[ml].specialType == 1:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optInfo, SpecialModel = MLmethods[
                                    ml].optimizePars(trainData, folds=5)
                                optAcc[ml].append(optInfo["Acc"])
                            else:
                                res = evalUtilities.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    stratified=orange.MakeRandomIndices.
                                    StratifiedIfPossible,
                                    random_generator=random.randint(0, 100))
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)
                        else:
                            runPath = miscUtilities.createScratchDir(
                                baseDir=AZOC.NFS_SCRATCHDIR,
                                desc="AccWOptParam",
                                seed=id(trainData))
                            trainData.save(
                                os.path.join(runPath, "trainData.tab"))
                            tunedPars = paramOptUtilities.getOptParam(
                                learner=MLmethods[ml],
                                trainDataFile=os.path.join(
                                    runPath, "trainData.tab"),
                                paramList=self.paramList,
                                useGrid=False,
                                verbose=self.verbose,
                                queueType=self.queueType,
                                runPath=runPath,
                                nExtFolds=None,
                                nFolds=self.nInnerFolds,
                                logFile=self.logFile,
                                getTunedPars=True,
                                fixedParams=self.fixedParams)
                            if not MLmethods[ml] or not MLmethods[ml].optimized:
                                self.__log(
                                    "       WARNING: GETACCWOPTPARAM: The learner "
                                    + str(ml) + " was not optimized.")
                                self.__log(
                                    "                It will be ignored")
                                #self.__log("                It will be set to default parameters")
                                self.__log(
                                    "                    DEBUG can be done in: "
                                    + runPath)
                                #Set learner back to default
                                #MLmethods[ml] = MLmethods[ml].__class__()
                                raise Exception("The learner " + str(ml) +
                                                " was not optimized.")
                            else:
                                if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                    optAcc[ml].append(tunedPars[0])
                                else:
                                    res = evalUtilities.crossValidation(
                                        [MLmethods[ml]],
                                        trainData,
                                        folds=5,
                                        stratified=orange.MakeRandomIndices.
                                        StratifiedIfPossible,
                                        random_generator=random.randint(
                                            0, 100))
                                    R2 = evalUtilities.R2(res)[0]
                                    optAcc[ml].append(R2)

                                miscUtilities.removeDir(runPath)
                    #Train the model
                    if SpecialModel is not None:
                        model = SpecialModel
                    else:
                        model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    #Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None
                    if callBackWithFoldModel:
                        callBackWithFoldModel(model)

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                res["runningTime"] = time.time() - startTime
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                self.__log("       Learner " + str(ml) +
                           " failed to create/optimize the model!")
                error = str(sys.exc_info()[0]) +" "+\
                            str(sys.exc_info()[1]) +" "+\
                            str(traceback.extract_tb(sys.exc_info()[2]))
                self.__log(error)

                res = self.createStatObj()
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            #We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            # ALWAYS exclude specialType models (MLmethods[ml].specialType > 0)
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName][
                        "stable"]:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            self.__log("Found " + str(len(consensusMLs)) +
                       " stable MLmethods out of " + str(len(statistics)) +
                       " MLmethods.")

            if len(consensusMLs
                   ) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(
                        statistics[modelName])

            # Exclude specialType models
            excludeThis = []
            for learnerName in consensusMLs:
                if models[learnerName][0].specialType > 0:
                    excludeThis.append(learnerName)
            for learnerName in excludeThis:
                consensusMLs.pop(learnerName)
                self.__log("    > Excluded special model " + learnerName)
            self.__log("    > Stable modules: " + str(consensusMLs.keys()))

            if len(consensusMLs) >= 2:
                #Var for saving each Fols result
                startTime = time.time()
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on "
                    + str([ml for ml in consensusMLs]))
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        # exprTest0
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        # exprTest1
                        exprTest1 = "(0"
                        for ml in consensusMLs:
                            exprTest1 += "+( " + ml + " == " + CLASS1 + " )*" + str(
                                optAcc[ml][foldN]) + " "
                        exprTest1 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest1 += ", " + ml + " == " + CLASS1 + " "
                        exprTest1 += "]),1)"
                        # Expression
                        expression = [
                            exprTest0 + " >= " + exprTest1 + " -> " + CLASS0,
                            " -> " + CLASS1
                        ]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(
                                optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(
                        DataIdxs, foldN + 1)  # fold 0 if for the train Bias!!
                    smilesAttr = dataUtilities.getSMILESAttr(testData)
                    if smilesAttr:
                        self.__log("Found SMILES attribute:" + smilesAttr)
                        testData = dataUtilities.attributeDeselectionData(
                            testData, [smilesAttr])
                        self.__log("Selected attrs: " + str(
                            [attr.name
                             for attr in trainData.domain[0:3]] + ["..."] + [
                                 attr.name for attr in
                                 trainData.domain[len(trainData.domain) - 3:]
                             ]))

                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[
                            learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(
                        classifiers=consensusClassifiers,
                        expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    #Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (evalUtilities.getClassificationAccuracy(
                                testData, model),
                             evalUtilities.getConfMat(testData, model)))
                    else:
                        local_exp_pred = []
                        # Predict using bulk-predict
                        predictions = model(testData)
                        # Gather predictions
                        for n, ex in enumerate(testData):
                            local_exp_pred.append(
                                (ex.getclass().value, predictions[n].value))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred),
                             evalUtilities.calcRsqrt(local_exp_pred)))
                        #Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(
                    Cresults,
                    Cexp_pred,
                    CnTrainEx,
                    CnTestEx,
                    self.responseType,
                    self.nExtFolds,
                    labels=hasattr(self.data.domain.classVar, "values")
                    and list(self.data.domain.classVar.values) or None)
                res["runningTime"] = time.time() - startTime
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"][
                    "IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        #By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]