def test_MetaDataHandleForSavingModel(self): """Test the handling of SaveModel for Data with Meta Atributes """ expectedAccWMeta = 1.0 # VEr 0.3 expectedAccNoMetaValues = [ 0.56666666700000001, # Ver 0.3 0.563636364 ] #Test the save of a model created from a train data with meta attributes self.assert_( len(self.WMetaTest.domain.getmetas()) >= 1, "The dataset WMetaTest should have Meta Attributes") RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0") rfM = RFlearner(self.WMetaTest) AccNoMetaBefore = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, rfM) AccWMetaBefore = evalUtilities.getClassificationAccuracy( self.WMetaTest, rfM) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "RFModel.RF") rfM.write(modelPath) # Read in the model rfR = AZorngRF.RFread(modelPath) self.assert_( len(rfR.domain.getmetas()) == 0, "There shouldn't be any Meta data now!") # Calculate classification accuracy AccNoMetaAfter = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, rfR) AccWMetaAfter = evalUtilities.getClassificationAccuracy( self.WMetaTest, rfR) # Test that the accuracy of the model before and after saved self.assertEqual( AccNoMetaBefore, AccNoMetaAfter, "NoMeta: Predictions after loading saved model were different") self.assertEqual( AccWMetaBefore, AccWMetaAfter, "WMeta: Predictions after loading saved model were different") self.assertEqual(round(AccWMetaAfter, 9), round(expectedAccWMeta, 9)) self.assertRoundedToExpectedArray(AccNoMetaAfter, expectedAccNoMetaValues, 9) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_save_load_Regression_D_Attr(self): """ Test Save/Load Regression model with Discrete Attribute""" #Create a selector to select just the correct attributes selector = range(len(self.RegDAttr.domain)) #Remove the second attribute (idx=1) selector.pop(1) #Apply the selector to the self.RegDAttr data = self.RegDAttr.select(selector) RFsign = AZorngRF.RFLearner(data, nTrees=200, nActVars=155, maxDepth=100) res1 = [] for ex in self.RegDAttr: res1.append(str(RFsign(ex))) scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "RFModel") RFsign.write(modelPath) loadedRFmodel = AZorngRF.RFread(modelPath) res2 = [] for ex in self.RegDAttr: res2.append(str(loadedRFmodel(ex))) self.assertEqual(res1, res2) self.assertEqual(res1, [ '5.404782', '2.568249', '2.979486', '4.287185', '5.335753', '4.439877', '3.682451', '8.054751', '6.511803', '5.760388', '7.771009', '2.328262', '6.062288', '5.577081', '3.639579', '6.862591', '3.793468', '2.865258', '3.531777', '6.833398', '6.376686', '3.338588', '7.002612', '7.137580', '7.258987', '6.899173', '7.547265', '8.708020', '6.262212', '7.563741', '8.166364', '6.614120', '7.865033', '9.060866', '8.057292', '4.877943', '7.993115', '9.198319', '9.428467', '8.537990', '9.130789', '6.328936', '8.247712', '7.605743', '8.755456', '6.983065', '7.712387', '9.972745', '9.763152', '7.934700', '8.447981', '7.272462', '8.824869', '7.654151', '7.795481', '7.229007', '8.680950', '9.439033', '9.130064', '8.505672', '8.082146', '6.086042', '7.493593', '8.981513', '8.880632', '6.548739' ]) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_Priors(self): """Test to assure that priors are set correcly.""" # Create a RF model RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0", priors = {"Iris-versicolor":0.35, "Iris-virginica":0.13, "Iris-setosa":0.52}) RFmodel = RFlearner(self.irisData) # Calculate classification accuracy Acc = evalUtilities.getClassificationAccuracy(self.irisData, RFmodel) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "modelPriors.RF") RFmodel.write(modelPath) # Read in the model newRFmodel = AZorngRF.RFread(modelPath) # Calculate classification accuracy savedAcc = evalUtilities.getClassificationAccuracy( self.irisData, newRFmodel) # Test that the accuracy of the two classifiers is the exact same self.assertEqual(Acc, savedAcc) #Check the priors saved in the model file = open(os.path.join(modelPath, "model.rf"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[22].strip() + lines[23].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 3) self.assertEqual( priors[self.irisData.domain.classVar.values.index("Iris-setosa")], 0.52) self.assertEqual( priors[self.irisData.domain.classVar.values.index( "Iris-versicolor")], 0.35) self.assertEqual( priors[self.irisData.domain.classVar.values.index( "Iris-virginica")], 0.13) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_CreateLogicalExpressionConsensusLearner(self): """ Test creation of logical expression consensus learner """ # Arrange # Construct expression learner/classifier learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } discreteExpression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) verifiedLearner = AZorngCvSVM.CvSVMLearner() verifiedClassifier = verifiedLearner(self.irisData) # Act result = [] verifiedResult = [] for ex in self.irisData: result.append(discreteClassifier(ex)) verifiedResult.append(verifiedClassifier(ex)) # Assert for index, item in enumerate(result): if not result[index].value == verifiedResult[index].value: print "Not equal on index: ", index self.assertEqual(result[index].value, verifiedResult[index].value)
def getRFAcc(train, work): model = AZorngRF.RFLearner(train) TP = 0 TN = 0 FP = 0 FN = 0 for ex in work: pred = model(ex).value actual = ex.get_class().value if actual == "POS": if pred == "POS": TP = TP + 1 else: FN = FN + 1 elif actual == "NEG": if pred == "NEG": TN = TN + 1 else: FP = FP + 1 print "TP\tTN\tFP\tFN\n" print str(TP) + "\t" + str(TN) + "\t" + str(FP) + "\t" + str(FN) + "\n" fid = open("RFresults.txt", "a") fid.write( str(TP) + "\t" + str(TN) + "\t" + str(FP) + "\t" + str(FN) + "\n") fid.close()
def test_CreateModelWithLearnerDictionary(self): """ Test the creation of Consensus Model using dictionary of learners """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } expression = "a + b + c" # Act learner = AZorngConsensus.ConsensusLearner(learners=learners, expression=expression) # Assert for k, v in learner.learners.items(): self.assertEqual(learner.learners[k], learners[k]) self.assertEqual(learner.expression, expression) self.assertEqual(learner.name, "Consensus learner") self.assertEqual(learner.verbose, 0) self.assertEqual(learner.imputeData, None) self.assertEqual(learner.NTrainEx, 0) self.assertEqual(learner.basicStat, None) self.assertEqual(learner.weights, None)
def getAccStat(rankSumTuple, nDesc, train, randTest, extTest, resultsFid, projectName): print "Select features based on top ranked features" attrList = [] for elem in rankSumTuple: if len(attrList) < nDesc: attrList.append(elem[0]) train = dataUtilities.attributeSelectionData(train, attrList) train = dataUtilities.attributeDeselectionData( train, ['HLM_XEN025;Mean;CLint (uL/min/mg);(Num)']) print train.domain.attributes, len( train.domain.attributes), train.domain.classVar # Get accuracies learners = [AZorngRF.RFLearner(nTrees=100)] print "CV accuracy" MCC_CV = printCV(train, learners, resultsFid, projectName) Model = learners[0](train) print "Random Test set accuracy" MCC_rand = printTestSetAcc(Model, randTest, learners, resultsFid, projectName, True) print "External Test set accuracy" MCC_ext = printTestSetAcc(Model, extTest, learners, resultsFid, projectName, False) return MCC_CV, MCC_rand, MCC_ext
def test_Probabilities(self): """Test if the returned probabilities are not fake""" RF = AZorngRF.RFLearner(self.trainData, nTrees=200, nActVars=155, maxDepth=100) res = [] for idx, ex in enumerate(self.testData): res.append(RF(ex, resultType=orange.GetProbabilities)) self.assertEqual( RF.isRealProb(), True, "Example " + str(idx) + " did not return real probability") self.assert_( res[-1][0] >= 0 and res[-1][0] <= 1, "Example " + str(idx) + " have impossible probability:" + str(res[-1])) self.assert_( res[-1][1] >= 0 and res[-1][1] <= 1, "Example " + str(idx) + " have impossible probability:" + str(res[-1])) #print "Res",idx,":",res[-1] #print "Sum",idx,":",round(sum(res[-1]),5) self.assertEqual( round(sum(res[-1]), 5), 1, "Probabilities of Example " + str(idx) + " did not sum 1") sum0 = sum([x[0] for x in res]) sum1 = sum([x[1] for x in res]) self.assertEqual(len(self.testData), round(sum0 + sum1, 5)) self.assert_(sum0 - int(sum0) > 0) self.assert_(sum1 - int(sum1) > 0)
def test_CreateDefaultClassifierUsingTrainingData(self): """ Test the creation of default Classifier by calling learner with training data. """ # Arrange learners = [ AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner() ] trainingData = self.getRegressionTrainingData() learner = AZorngConsensus.ConsensusLearner(learners=learners) # Act classifier = learner(trainingData) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertEqual(classifier.expression, None) self.assertEqual(classifier.name, "Consensus classifier") self.assertEqual(classifier.verbose, 0) self.assertNotEqual(classifier.imputeData, None) self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertNotEqual(classifier.basicStat, None) self.assertEqual(classifier.weights, None)
def test_GlobalVarImportance(self): """Test the Global Variable Importance results""" RF = AZorngRF.RFLearner(self.NoMetaTrain, getVarVariance=True) sum = 0 for var in RF.domain.attributes: sum += RF.varImportance[var.name] self.assertEqual(round(sum, 6), 1.0)
def test_CanPersistClassificationModelProbabilities(self): """Test the save/load for a classification model - Using probabilities average""" # Arrange learners = [AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner()] learner = AZorngConsensus.ConsensusLearner(learners=learners) classifier = learner(self.irisData) # Act predictions = [] for ex in self.irisData: predictions.append(classifier(ex)) scratchdir = miscUtilities.createScratchDir( desc="ConsensusSaveLoadTest") print scratchdir classifier.write(os.path.join(scratchdir, "./CM.model")) # Assert predictionsL = [] Loaded = AZorngConsensus.Consensusread( os.path.join(scratchdir, "./CM.model")) for ex in self.irisData: predictionsL.append(Loaded(ex)) self.assertEqual(predictions, predictionsL) self.assertEqual(len(Loaded.domain), len(self.irisData.domain)) self.assertEqual(len(Loaded.imputeData), len(Loaded.domain)) self.assertEqual(len(Loaded.basicStat), len(Loaded.domain)) self.assertEqual(Loaded.NTrainEx, len(self.irisData)) miscUtilities.removeDir(scratchdir)
def test_VarCtrlVal(self): """Test of Variable Control Validation""" data = dataUtilities.DataTable( os.path.join(AZOC.AZORANGEHOME, "tests/source/data/iris_W_dataOrigin.tab")) learners = [AZorngRF.RFLearner()] pTrain = 60 / 100. # Percentage od data to be used in TrainSet rep = 10 # Number of repetitions res = evalUtilities.proportionTest(learners, data, pTrain, times=rep, testAttrFilter="Data Origin", testFilterVal=["SRC1"]) self.assert_(len(res.results) == 140) self.assert_( evalUtilities.ConfMat(res) == [[[100.0, 0.0], [0.0, 40.0]]]) self.assert_(evalUtilities.CA(res)[0] == 1.0) res = evalUtilities.crossValidation(learners, data, rep, testAttrFilter="Data Origin", testFilterVal=["SRC1"]) self.assert_(len(res.results) == 36) self.assert_( evalUtilities.ConfMat(res) == [[[26.0, 0.0], [0.0, 10.0]]]) self.assert_(evalUtilities.CA(res)[0] == 1.0)
def probPred(idx, extTrain, SVMparam): """ Use the RF prediction probability to set the non-conf score """ attrList = ["SMILES_1"] extTrain = dataUtilities.attributeDeselectionData(extTrain, attrList) # Deselect example idx in extTrain idxList = range(0, idx) idxList.extend(range(idx + 1, len(extTrain))) train = extTrain.get_items(idxList) # Train a model model = AZorngRF.RFLearner(train) #model, SVMparam = trainSVMOptParam(train, SVMparam) # Predict example idx predList = model(extTrain[idx], returnDFV=True) pred = predList[0].value prob = predList[1] actual = extTrain[idx].get_class().value #print pred, actual, prob # More non conforming if prediction is different from actual label if pred != actual: alpha = 1.0 + abs(prob) else: alpha = 1.0 - abs(prob) #print alpha return alpha, SVMparam
def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self): """ Test the creation of custom Consensus Classifier using pre-trained regression classifiers. """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } classifiers = {} for k, v in learners.items(): classifiers[k] = v(self.getRegressionTrainingData()) expression = "a + b + c" # Act classifier = AZorngConsensus.ConsensusClassifier( classifiers=classifiers, expression=expression) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, expression) self.assertNotEqual(classifier.imputeData, None) #self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_CreateDefaultClassifierUsingPreTrainedRegressionClassifiers(self): """ Test the creation of default Consensus Classifier using pre-trained classification classifiers. """ # Arrange learners = [ AZorngCvSVM.CvSVMLearner(), AZorngCvANN.CvANNLearner(), AZorngRF.RFLearner() ] classifiers = [l(self.getRegressionTrainingData()) for l in learners] # Act classifier = AZorngConsensus.ConsensusClassifier( classifiers=classifiers) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, None) self.assertNotEqual(classifier.imputeData, None) #self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_CreateCustomClassificationClassifierUsingTrainingData(self): """ Test the creation of custom classification Classifier by calling learner with training data. """ # Arrange learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } expression = [ "firstLearner == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] trainingData = self.getClassificationTrainingData() learner = AZorngConsensus.ConsensusLearner(learners=learners, expression=expression) # Act classifier = learner(trainingData) # Assert self.assertNotEqual(classifier, None) self.assertEqual(len(classifier.classifiers), len(learners)) self.assertNotEqual(classifier.basicStat, None) self.assertNotEqual(classifier.classVar, None) self.assertNotEqual(classifier.domain, None) self.assertEqual(classifier.expression, expression) self.assertNotEqual(classifier.imputeData, None) self.assertEqual(classifier.NTrainEx, len(trainingData)) self.assertEqual(classifier.name, "Consensus classifier") self.assertNotEqual(classifier.varNames, None) self.assertEqual(classifier.verbose, 0) self.assertEqual(classifier.weights, None)
def test_PredictionWithDiffVarType(self): """Test prediction with diff. VarType Test the prediction of examples with different varType """ expectedAccValues = [ 0.96296296296296291, # Ver 0.3 1.0 ] # Create a rf model RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0") rf = RFlearner(self.noBadDataTrain) #using from index 3 o the end of data, because we know that from 0 to 2 the examples are not compatible Acc2 = evalUtilities.getClassificationAccuracy(self.noBadDataTest[3:], rf) Acc1 = evalUtilities.getClassificationAccuracy(self.badVarTypeData[3:], rf) self.assertRoundedToExpectedArray(Acc1, expectedAccValues, 9) self.assertRoundedToExpectedArray(Acc2, expectedAccValues, 9) self.assert_( ('Fixed Types of variables' in rf.examplesFixedLog) and (rf.examplesFixedLog['Fixed Types of variables'] == 27), "No report of fixing in classifier class") self.assert_( ('Vars needing type fix' in rf.examplesFixedLog) and (rf.examplesFixedLog['Vars needing type fix']['[Br]([C])'] == "EnumVariable to FloatVariable", "No report of fixing in classifier class"))
def getRFprobAcc(train, work, probThres): model = AZorngRF.RFLearner(train) TP = 0 TN = 0 FP = 0 FN = 0 noPred = 0 for ex in work: actual = ex.get_class().value predList = model(ex, returnDFV = True) pred = predList[0].value prob = predList[1] if abs(prob) > probThres: if actual == "A": if pred == "A": TP = TP + 1 else: FN = FN + 1 elif actual == "N": if pred == "N": TN = TN + 1 else: FP = FP + 1 else: noPred = noPred + 1 print "TP\tTN\tFP\tFN\tnoPred\n" print str(TP)+"\t"+str(TN)+"\t"+str(FP)+"\t"+str(FN)+"\t"+str(noPred)+"\n" fid = open("RFprob"+str(probThres)+"Results.txt", "a") fid.write(str(TP)+"\t"+str(TN)+"\t"+str(FP)+"\t"+str(FN)+"\t"+str(noPred)+"\n") fid.close()
def testgetRMSE(self): data = dataUtilities.DataTable(self.regDataPath) RFlearner = AZorngRF.RFLearner() trainData = data[0:int(len(data) / 2)] testData = data[int(len(data) / 2) + 1:] classifier = RFlearner(data) RMSE = evalUtilities.getRMSE(testData, classifier) self.assert_(RMSE - 2.07396535555 < 0.05, "Got:" + str(RMSE))
def test_CustomLogicalExpressionUsingOrAndStatement(self): """ Test logical expression using OR/AND statements """ # Arrange # Construct verification learners a = AZorngCvSVM.CvSVMLearner() a = a(self.irisData) b = AZorngCvANN.CvANNLearner() b = b(self.irisData) c = AZorngRF.RFLearner() c = c(self.irisData) # Construct expression learner/classifier learners = { 'a': AZorngCvSVM.CvSVMLearner(), 'b': AZorngCvANN.CvANNLearner(), 'c': AZorngRF.RFLearner() } discreteExpression = [ "a == Iris-setosa and c == Iris-virginica or b == Iris-setosa -> Iris-setosa", "-> Iris-virginica" ] discreteLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=discreteExpression) discreteClassifier = discreteLearner(self.irisData) # Act result = [] for ex in self.irisData: result.append(discreteClassifier(ex)) verifiedResult = [] for ex in self.irisData: if a(ex).value == "Iris-setosa" and c( ex).value == "Iris-virginica" or b( ex).value == "Iris-setosa": verifiedResult.append("Iris-setosa") else: verifiedResult.append("Iris-virginica") # Assert for index, item in enumerate(result): if not result[index].value == verifiedResult[index]: print "Not equal on index: ", index, " Predicted: ", result[ index].value, " Real: ", verifiedResult[index] self.assertEqual(result[index].value, verifiedResult[index])
def testgetRsqrt(self): data = dataUtilities.DataTable(self.regDataPath) RFlearner = AZorngRF.RFLearner() trainData = data[0:int(len(data) / 2)] testData = data[int(len(data) / 2) + 1:] classifier = RFlearner(data) Rsqrt = evalUtilities.getRsqrt(testData, classifier) self.assert_(Rsqrt - 0.684011336894 < 0.05, "Got:" + str(Rsqrt))
def test_RFRegression(self): """RF - Test of optimizer with continuous class data """ #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM = "AZutilities.evalUtilities.RMSE" # Create an interface for setting optimizer parameters pars = AZLearnersParamsConfig.API("RFLearner") # Set the parameters in parameterList to be optimized pars.setParameter("NumThreads", "optimize", False) # Change the default pars.setParameter("NumThreads", "default", "1") # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ useParameters = pars.getParametersDict(),\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assertEqual(round(tunedPars[0], 2), round(3.1499999999999999, 2)) #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest, classifier) self.assertEqual(round(RMSE, 2), round(2.02, 2)) #Ver 0.3 #Check if the best result was not the one with numThreads different of 1 since that way we can get #different results among runs self.assertEqual(int(tunedPars[1]["NumThreads"]), 1) miscUtilities.removeDir(runPath)
def LLOOprob(idx, extTrain, measure=None): """ Use the fraction of kNN correctly predicted by a local model Hard coded to 20 NN. Modeling method. RF of Tree? """ distList = [] if not measure: measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain) for runIdx in range(len(extTrain)): if runIdx != idx: dist = measure(extTrain[idx], extTrain[runIdx]) distList.append(dist) # Get the distance of the 20th NN distList.sort() thresDist = distList[ 50] # Smaller number of NN does not work with returnDFV # Find the predEx and the 20 NN kNN = [] for runIdx in range(len(extTrain)): dist = measure(extTrain[idx], extTrain[runIdx]) if dist <= thresDist: kNN.append(extTrain[runIdx]) kNNtrain = dataUtilities.DataTable(kNN) # Find the fraction of correctly predicted ex in a LOO over kNN alphaList = [] for iidx in range(len(kNNtrain)): # Deselect example idx in extTrain idxList = range(0, iidx) idxList.extend(range(iidx + 1, len(kNNtrain))) train = kNNtrain.get_items(idxList) # Get prediction and pred probability model = AZorngRF.RFLearner(train) predList = model(kNNtrain[iidx], returnDFV=True) pred = predList[0].value prob = predList[1] actual = kNNtrain[iidx].get_class().value # alpha should be greater the less certain the model try: if pred != actual: alpha = 1.0 + abs(prob) else: alpha = 1.0 - abs(prob) alphaList.append(alpha) except: pass alpha = sum(alphaList) / float(len(alphaList)) return alpha
def test_SavedModel(self): """Test to assure that a saved RF model gives the same predictions as before saving.""" # Create a RF model RFlearner = AZorngRF.RFLearner(maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0") RFmodel = RFlearner(self.trainData) # Calculate classification accuracy Acc = evalUtilities.getClassificationAccuracy(self.testData, RFmodel) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "model.RF") RFmodel.write(modelPath) # Read in the model newRFmodel = AZorngRF.RFread(modelPath) # Calculate classification accuracy savedAcc = evalUtilities.getClassificationAccuracy( self.testData, newRFmodel) # Test that the accuracy of the two classifiers is the exact same self.assertEqual(Acc, savedAcc) #Check the priors saved in the model file = open(os.path.join(modelPath, "model.rf"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[22].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 2) self.assertEqual( priors[self.testData.domain.classVar.values.index("POS")], 0.50) self.assertEqual( priors[self.testData.domain.classVar.values.index("NEG")], 0.50) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_AverageNRegressionExpressionUsingObjMap(self): """ Test regular expression using average N regression with object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Construct expression learner/classifier regressionExpression = "(firstLearner + secondLearner + thirdLearner) / 3" expressionLearner = AZorngConsensus.ConsensusLearner( learners=learners, expression=regressionExpression) expressionClassifier = expressionLearner(self.DataReg) # Construct default learner/classifier defaultLearners = [ AZorngRF.RFLearner(), AZorngCvANN.CvANNLearner(), AZorngCvSVM.CvSVMLearner() ] defaultLearner = AZorngConsensus.ConsensusLearner( learners=defaultLearners) defaultClassifier = defaultLearner(self.DataReg) # Act expressionPredictions = [] for ex in self.DataReg: expressionPredictions.append(expressionClassifier(ex)) defaultPredictions = [] for ex in self.DataReg: defaultPredictions.append(defaultClassifier(ex)) # Assert for index in range(len(expressionPredictions)): self.assertEqual( True, float_compare(expressionPredictions[index], defaultPredictions[index]))
def test_RF_Classification(self): """PLS - Test of optimizer with discrete class data """ expectedAcc = [ 0.57999999999999996, 0.58999999999999997, 0.612 ] #Ver 0.3 - Artifact: The second value can be expected on other Systems #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization (Discrete class in this example) dataSet = self.discTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = False # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.CA" evalM = "AZutilities.evalUtilities.CA" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assert_( round(tunedPars[0], 2) in [round(x, 2) for x in expectedAcc]) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.discTrain) CA = evalUtilities.getClassificationAccuracy(self.discTest, classifier) expectedCA = [0.9655 ] # Artifact: Second value expected in UBUNTU 10.10 self.assert_(round(CA, 2) in [round(ca, 2) for ca in expectedCA]) # Ver 0.3 miscUtilities.removeDir(runPath)
def TopVarImportanceTest(data, expectNone=False): resA = [] resB = [] RF = AZorngRF.RFLearner(data) for ex in data: resA.append(RF.getTopImportantVars(ex, 1)) scratchdir = miscUtilities.createScratchDir( desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir, "CvRFModel") RF.write(modelPath) LoadedRF = AZorngRF.RFread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedRF.getTopImportantVars(ex, 1)) if expectNone: return resA == resB == [None] * len(data) else: return resA == resB and None not in resA and resA.count( resA[0]) != len(resA)
def testRMSEstdCalc(self): data = dataUtilities.DataTable(self.regDataPath) RFlearner = AZorngRF.RFLearner() learners = [RFlearner] nFolds = 5 res = orngTest.crossValidation( learners, data, strat=orange.MakeRandomIndices.StratifiedIfPossible, folds=nFolds) RMSEstd = evalUtilities.getRMSEstd(res, nFolds)[0] self.assertEqual(round(RMSEstd, 3), round(0.141, 3))
def test_CreateLearnerWithObjectMapping(self): """ Test the creation of learners with an object map """ # Arrange learners = { 'firstLearner': AZorngCvSVM.CvSVMLearner(), 'secondLearner': AZorngCvANN.CvANNLearner(), 'thirdLearner': AZorngRF.RFLearner() } # Act learner = AZorngConsensus.ConsensusLearner(learners=learners) # Assert self.assertEqual(len(learner.learners), len(learners))
def test_RF_Regression(self): """RF - Test of optimizer with continuous class data """ expectedRes = [ 3.27, 3.2599999999999998, 3.15 ] #Ver 0.3 - Artifact: The second value can be expected on other Systems #Create the appspack instance opt = paramOptUtilities.Appspack() #Learner to be optimized learner = AZorngRF.RFLearner() #dataset to use in the parameters optimization dataSet = self.contTrainDataPath # Define the objective function. This requires: # defining the extreme to find (the min or max): findMin=True or findMin=False fMin = True # defining the method for evaluation (must be a method that accepts as input an orngTest.ExperimentResults): # evaluateMethod="AZutilities.evalUtilities.R2" evalM = "AZutilities.evalUtilities.RMSE" # Create a directory for running the appspack (if not defined it will use the present working directory) runPath = miscUtilities.createScratchDir(desc="ParamOptTest") # Run the appspack which will configure the input learner and aditionaly return #[<minimum of objective function found>, <optimized parameters>] tunedPars = opt(learner=learner,\ dataSet=dataSet,\ evaluateMethod = evalM,\ findMin=fMin,\ runPath = runPath,\ useStd = False,\ verbose = 0) print "Returned: ", tunedPars print "====================== optimization Done ===========================" print "Learner optimized flag = ", learner.optimized print "Tuned parameters = ", tunedPars[1] print "Best optimization result = ", tunedPars[0] print "check the file intRes.txt to see the intermediate results of optimizer!" self.assertEqual(opt.usedMPI, False) self.assertEqual(learner.optimized, True) self.assert_( round(tunedPars[0], 2) in [round(x, 2) for x in expectedRes]) #Ver 0.3 #The learner is now with its optimized parameters already set, so we can now make a classifier out of it classifier = learner(self.contTrain) RMSE = evalUtilities.getRMSE(self.contTest, classifier) expectedRes = [2.89, 2.0158] self.assert_(round(RMSE, 2) in [round(x, 2) for x in expectedRes]) #Ver 0.3 miscUtilities.removeDir(runPath)