def test_SavedModel(self): """Test to assure that a saved Bayes model gives the same predictions as before saving.""" # Create an Bayes model Bayes = AZorngCvBayes.CvBayesLearner(self.train_data) # Calculate classification accuracy Acc = evalUtilities.getClassificationAccuracy(self.test_data, Bayes) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdir" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "Bayes.fBayes") Bayes.write(modelPath) # Read in the model Bayes = AZorngCvBayes.CvBayesread(modelPath) # Calculate classification accuracy savedAcc = evalUtilities.getClassificationAccuracy( self.test_data, Bayes) # Test that the accuracy of the two classifiers is the exact same self.assertEqual(Acc, savedAcc) #Test using the global read functionality Bayes2 = AZBaseClasses.modelRead(modelPath) savedAcc2 = evalUtilities.getClassificationAccuracy( self.test_data, Bayes2) self.assertEqual(Acc, savedAcc2) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_TwoWays(self): """ Test that an Bayes created in one or two steps give the same results """ #Deviation allowed in Acc devAlloed = 0.4 #Before: 0.02 # One step Bayes creation Bayes = AZorngCvBayes.CvBayesLearner(self.train_data) # Calculate classification accuracy for the classifier trained in one step oneStepAcc = evalUtilities.getClassificationAccuracy( self.test_data, Bayes) # Two step Bayes creation learner = AZorngCvBayes.CvBayesLearner() Bayes = learner(self.train_data) # Calculate classification accuracy for the classifier trained in two steps twoStepAcc = evalUtilities.getClassificationAccuracy( self.test_data, Bayes) # Test that the accuracy of the classifiers created in different ways is the exact same self.assert_( oneStepAcc >= twoStepAcc - devAlloed and oneStepAcc <= twoStepAcc + devAlloed, "Dev=" + str(oneStepAcc - twoStepAcc))
def test_MetaDataHandleForSavingModel(self): """Test the handling of SaveModel for Data with Meta Atributes """ expectedAccWMeta = [0.733333333, 0.83333333300000001] expectedAccNoMeta = [0.55151515200000001] #Test the save of a model created from a train data with meta attributes self.assert_( len(self.WMetaTest.domain.getmetas()) >= 1, "The dataset WMetaTest should have Meta Attributes") CvBayeslearner = AZorngCvBayes.CvBayesLearner() BayesM = CvBayeslearner(self.WMetaTest) AccNoMetaBefore = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, BayesM) AccWMetaBefore = evalUtilities.getClassificationAccuracy( self.WMetaTest, BayesM) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdiriTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "CvBayesModel.CvBayes") BayesM.write(modelPath) # Read in the model BayesR = AZorngCvBayes.CvBayesread(modelPath) self.assert_( len(BayesR.imputer.defaults.domain.getmetas()) == 0, "There shouldn't be any Meta data now!") # Calculate classification accuracy AccNoMetaAfter = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, BayesR) AccWMetaAfter = evalUtilities.getClassificationAccuracy( self.WMetaTest, BayesR) # Test that the accuracy of the model before and after saved self.assertEqual( AccNoMetaBefore, AccNoMetaAfter, "NoMeta: Predictions after loading saved model were different") self.assertEqual( AccWMetaBefore, AccWMetaAfter, "WMeta: Predictions after loading saved model were different") self.assert_( round(AccWMetaAfter, 5) in [round(x, 5) for x in expectedAccWMeta]) self.assert_( round(AccNoMetaAfter, 5) in [round(x, 5) for x in expectedAccNoMeta]) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def TopVarImportanceTest(data, expectNone=False): resA = [] resB = [] CvBayes = AZorngCvBayes.CvBayesLearner(data) for ex in data: resA.append(CvBayes.getTopImportantVars(ex, 1)) scratchdir = miscUtilities.createScratchDir( desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir, "CvBayesModel") CvBayes.write(modelPath) LoadedCvBayes = AZorngCvBayes.CvBayesread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedCvBayes.getTopImportantVars(ex, 1)) if expectNone: return resA == resB == [None] * len(data) else: return resA == resB and None not in resA and resA.count( resA[0]) != len(resA)
def test_PredictionWithDiffVarOrder(self): """Test Prediction with diff. VarOrder Test the prediction examples with different varOrder """ expectedAcc = [0.33333333300000001, 0.666666667] # Create a Bayes model CvBayeslearner = AZorngCvBayes.CvBayesLearner() Bayes = CvBayeslearner(self.noBadDataTrain) #using from index 3 o the end of data, because we know that from 0 to 2 the examples are not compatible Acc1 = evalUtilities.getClassificationAccuracy(self.noBadDataTest, Bayes) Acc2 = evalUtilities.getClassificationAccuracy(self.badVarOrderData, Bayes) self.assertEqual(Acc1, Acc2) self.assert_(round(Acc1, 5) in [round(x, 5) for x in expectedAcc])
def test_ImputeTrain(self): """ Assure that imputation works for the Bayes models. Test on data with missing values This test just assures the the model is trained. The correct imputation test is made on testImpute """ expectedAcc = 0.41818 devAllowed = 0.3 BayesLearner = AZorngCvBayes.CvBayesLearner() Bayes = BayesLearner(self.missingTrain) Acc = evalUtilities.getClassificationAccuracy(self.missingTest, Bayes) self.assert_( abs(expectedAcc - Acc) <= devAllowed, "Dev=" + str(expectedAcc - Acc))
def test_PersistentClassAcc(self): """ Assure that the accuracy is perserved for models trained in the same way. """ #Deviation Allowed devAllowed = 0.5 ExpectedAcc = 0.5 # One step Bayes creation Bayes = AZorngCvBayes.CvBayesLearner(self.train_data) # Calculate classification accuracy for the classifier trained in one step oneStepAcc = evalUtilities.getClassificationAccuracy( self.test_data, Bayes) # Check that the accuracy is what it used to be print oneStepAcc self.assert_( abs(oneStepAcc - ExpectedAcc) <= devAllowed, "Dev=" + str(oneStepAcc - ExpectedAcc))
def test_MetaDataHandle(self): """Test the handling of Data with Meta Atributes """ expectedAcc = 0.33333333300000001 #[0.666666667, 0.333333333] # Create an Bayes model CvBayeslearner = AZorngCvBayes.CvBayesLearner() Bayes = CvBayeslearner(self.NoMetaTrain) # Calculate classification accuracy (NoMetaTest and WMeta are the same appart from the meta atribute) AccNoMeta = evalUtilities.getClassificationAccuracy( self.NoMetaTest, Bayes) AccWMeta = evalUtilities.getClassificationAccuracy( self.WMetaTest, Bayes) self.assertEqual( AccNoMeta, AccWMeta, "Predictions with and without meta data were different!") self.assertEqual(round(AccNoMeta, 9), round(expectedAcc, 9))
def test_PredictionWithIncompatibleDomain(self): """Test prediction with uncompatible domain Test the non-prediction of examples with an incompatible domain """ expectedAcc1 = 0.33333333300000001 # Create a Bayes model CvBayeslearner = AZorngCvBayes.CvBayesLearner() Bayes = CvBayeslearner(self.noBadDataTrain) #using from index 3 o the end of data, because we know that from 0 to 2 the examples are not compatible Acc1 = evalUtilities.getClassificationAccuracy(self.noBadDataTest, Bayes) self.assertEqual(round(Acc1, 9), round(expectedAcc1, 9)) self.assertEqual(Bayes(self.badVarTypeData[0]), "POS", "This example could still be predicted") self.assertEqual(Bayes(self.badVarTypeData[1]), "POS", "This example could still be predicted") self.assertEqual(Bayes(self.badVarNameData[0]), None, "This example should NOT be predicted") self.assertEqual(Bayes(self.badVarCountData[0]), None, "This example should NOT be predicted")
def test_Probabilities(self): """Test if the returned probabilities are ok although fake""" CvBayes = AZorngCvBayes.CvBayesLearner(self.LdataTrain) res = [] for idx, ex in enumerate(self.LdataTest): res.append(CvBayes(ex, resultType=orange.GetProbabilities)) #print res[-1] self.assert_( res[-1][0] >= 0 and res[-1][0] <= 1, "Example " + str(idx) + " have impossible probability:" + str(res[-1])) self.assert_( res[-1][1] >= 0 and res[-1][1] <= 1, "Example " + str(idx) + " have impossible probability:" + str(res[-1])) #self.assertEqual(CvBayes.isRealProb(),True,"Example "+str(idx)+" did not return real probability") #print "Res",idx,":",res[-1] #print "Sum",idx,":",round(sum(res[-1]),5) self.assertEqual( round(sum(res[-1]), 5), 1, "Probabilities of Example " + str(idx) + " did not sum 1:" + str(res[-1])) sum0 = sum([x[0] for x in res]) sum1 = sum([x[1] for x in res]) self.assertEqual(len(self.LdataTest), round(sum0 + sum1, 5))
def test_PredictionWithDiffVarType(self): """Test prediction with diff. VarType Test the prediction of examples with different varType """ expectedAcc = 0.37036999999999998 # Create a Bayes model CvBayeslearner = AZorngCvBayes.CvBayesLearner() Bayes = CvBayeslearner(self.noBadDataTrain) #using from index 3 o the end of data, because we know that from 0 to 2 the examples are not compatible Acc2 = evalUtilities.getClassificationAccuracy(self.noBadDataTest[3:], Bayes) Acc1 = evalUtilities.getClassificationAccuracy(self.badVarTypeData[3:], Bayes) self.assertEqual(round(Acc1, 6), round(expectedAcc, 6)) self.assertEqual(round(Acc2, 6), round(expectedAcc, 6)) self.assert_( ('Fixed Types of variables' in Bayes.examplesFixedLog) and (Bayes.examplesFixedLog['Fixed Types of variables'] == 27), "No report of fixing in classifier class") self.assert_( ('Vars needing type fix' in Bayes.examplesFixedLog) and (Bayes.examplesFixedLog['Vars needing type fix']['[Br]([C])'] == "EnumVariable to FloatVariable", "No report of fixing in classifier class"))
def no_test_Impute(self): # Bayes cannot deal with regression """Test missing values imputation Assure that imputation works for the Bayes models. Test on data with missing values """ #This data is loaded here to speed up the test suite since it is too big contTestDataPath = os.path.join(AZOC.AZORANGEHOME, "tests/source/data/linearTest.tab") contTrainDataPath = os.path.join(AZOC.AZORANGEHOME, "tests/source/data/linearTrain.tab") contTrain = dataUtilities.DataTable(contTrainDataPath) contTest = dataUtilities.DataTable(contTestDataPath) ex1 = contTest[5] ex2 = contTest[6] self.assert_(ex1["Desc 71"] != "?", "The var Desc 71 shouldn't be missing!") self.assert_(ex2["Desc 138"] != "?", "The var Desc 138 shouldn't be missing!") imputer = orange.ImputerConstructor_average(contTrain) CvBayeslearner = AZorngCvBayes.CvBayesLearner() Bayes = CvBayeslearner(contTrain) # Prediction for data as it is P1 = Bayes(ex1) P2 = Bayes(ex2) # Predictions changing one continuous and one discrete variable to 0 ex1["Desc 71"] = 0 ex2["Desc 138"] = 0 P1_0 = Bayes(ex1) P2_0 = Bayes(ex2) # Predictions changing the same continuous and discrete variable to it's correspondent imputation value ex1["Desc 71"] = imputer.defaults["Desc 71"] ex2["Desc 138"] = imputer.defaults["Desc 138"] P1_imp = Bayes(ex1) P2_imp = Bayes(ex2) # Predictions changing the same continuous and discrete variable to '?' wich means that the same imputation # as in the last case will have to be made inside the classifier. So, the predicted value must be the same ex1["Desc 71"] = "?" ex2["Desc 138"] = "?" self.assert_(ex1["Desc 71"] == "?", "The var Desc 71 should be missing now!") self.assert_(ex2["Desc 138"] == "?", "The var Desc 138 should be missing now!") P1Miss = Bayes(ex1) P2Miss = Bayes(ex2) # Test if the prediction made for the example with mising value is the same as the one # for the example which missing values were substituted using the same method as the classifier does. self.assert_( P1_imp == P1Miss, "Imputation was not made correctly inside the classifier") #self.assert_(P2_imp==P2Miss,"Imputation was not made correctly inside the classifier") # Assure that if other substitutions on those variables were made, the predicted value would be different, # and so, this is a valid method for testing the imputation self.assert_( P1.value != P2.value) # Just to assure that we are not comaring equal examples self.assert_( P1.value != P1_imp.value, "The imputed 1 was the same as the original ... try other example") self.assert_( P1_0.value != P1_imp.value, "The imputed 1 was the same as the replaced by 0. The classifier may be replacing missing values by 0" ) self.assert_( P2.value != P2Miss.value, "The missing imputed 2 was the same as the original ... try other example" ) self.assert_( P2_0.value != P2Miss.value, "The missing imputed 2 was the same as the replaced by 0. The classifier may be replacing missing values by 0" ) #Test the imputer for saved models # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdir" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "CvBayesModel") Bayes.write(modelPath) # Read in the model BayesM = AZorngCvBayes.CvBayesread(modelPath) # Predict the ex1 and ex2 which are still the examples with missing values '?' self.assert_(ex1["Desc 71"] == "?", "Value of Var Desc 71 should be missing!") self.assert_(ex2["Desc 138"] == "?", "Value of Var Desc 138 should be missing!") self.assert_( round(BayesM(ex1), 6) == round(P1Miss, 6), "Imputation on loaded model is not correct") self.assert_( round(BayesM(ex2), 6) == round(P2Miss, 6), "Imputation on loaded model is not correct") # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def no_test_DFV(self): #NOT implemnted for Bayes """ Test the Decision Function Value Return""" CvBayes = AZorngCvBayes.CvBayesLearner(self.LdataTrain) #Testsing with return of DFV RDFV = True for ex in self.LdataTest: predictedClass = CvBayes(ex) a = CvBayes(ex, returnDFV=RDFV) b = CvBayes(ex, resultType=orange.GetProbabilities, returnDFV=RDFV) c = CvBayes(ex, resultType=orange.GetBoth, returnDFV=RDFV) #All must return tuples self.assert_(type(a) == type(b) == type(c) == tuple) # Second element of the tupple must be the DFV self.assert_(type(a[1]) == type(b[1]) == type(c[1]) == float) self.assert_(a[1] == b[1] == c[1]) # check if if the class can be always predicted based on the DFV # Positive values will correspond always to the fisrt element of the class variable # and negative values to the second element of the class variabel if a[1] > 0: guessedClass = ex.domain.classVar[0] else: guessedClass = ex.domain.classVar[1] self.assertEqual(predictedClass, guessedClass) #asking for GetValue self.assert_(type(a[0]) == orange.Value) #asking for GetProbabilities self.assert_(type(b[0]) == orange.DiscDistribution) #asking for GetBoth... self.assert_(type(c[0]) == tuple) # ... where first element is the orange value... self.assert_(type(c[0][0]) == orange.Value) # ... and second element is the distribution (so called probabilities) self.assert_(type(c[0][1]) == orange.DiscDistribution) # CvBayes does always return real probabilities on binary classification self.assertEqual(CvBayes.isRealProb(), True) expectedExtremes = {'max': 0.5, 'min': -0.5} self.assertEqual( [round(x, 5) for x in CvBayes.getDFVExtremes().values()], [round(x, 5) for x in expectedExtremes.values()]) self.assertEqual(CvBayes.nPredictions, 4 * len(self.LdataTest)) #Testsing without return of DFV RDFV = False for ex in self.LdataTest: a = CvBayes(ex, returnDFV=RDFV) b = CvBayes(ex, resultType=orange.GetProbabilities, returnDFV=RDFV) c = CvBayes(ex, resultType=orange.GetBoth, returnDFV=RDFV) #asking for GetValue self.assert_(type(a) == orange.Value) #asking for GetProbabilities self.assert_(type(b) == orange.DiscDistribution) #asking for GetBoth... self.assert_(type(c) == tuple) # ... where first element is the orange value... self.assert_(type(c[0]) == orange.Value) # ... and second element is the distribution (so called probabilities) self.assert_(type(c[1]) == orange.DiscDistribution) # CvBayes does always return real probabilities on binary classification self.assertEqual(CvBayes.isRealProb(), True) self.assertEqual( [round(x, 5) for x in CvBayes.getDFVExtremes().values()], [round(x, 5) for x in expectedExtremes.values()]) self.assertEqual(CvBayes.nPredictions, (3 + 4) * len(self.LdataTest))
def modelRead(modelFile=None, verbose=0, retrunClassifier=True): """Get the type of model saved in 'modelPath' and loads the respective model Returns the Classifier saved in the respective model path If called without parameters, it returns a list of known classifier types It can returns the classifier, or just a string with the Type modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )""" if not modelFile: return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost", "CvBayes", "Consensus") modelType = None loadedModel = None if os.path.isfile(os.path.join(modelFile, "model.svm")): modelType = "CvSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngCvSVM loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")): modelType = "SignSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngSignSVM loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.ann")): modelType = "CvANN" if not retrunClassifier: return modelType from trainingMethods import AZorngCvANN loadedModel = AZorngCvANN.CvANNread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "Model.pls")): modelType = "PLS" if not retrunClassifier: return modelType from trainingMethods import AZorngPLS loadedModel = AZorngPLS.PLSread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.rf")): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "C0.model")): modelType = "Consensus" if not retrunClassifier: return modelType from trainingMethods import AZorngConsensus loadedModel = AZorngConsensus.Consensusread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.boost")): modelType = "CvBoost" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBoost loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.bayes")): modelType = "CvBayes" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBayes loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose) else: # Assuming an RF old format for backcompatibility try: if os.path.isdir(modelFile): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) else: modelType = None loadedModel = None except: modelType = None loadedModel = None return loadedModel