def classifySVM(self, onImg, theSign): #0) get training data data and the labels indexs,labels,train = self.getDataLabels(onImg, theSign, False) #1) initialize the svm and compute the model if(onImg == 1): #for full images problem = mlpy.Svm(kernel='gaussian', C=1.0, kp=0.1, tol=0.001, eps=0.001, maxloops=1000, opt_offset=True) elif(onImg == 2): #for PCAed images problem = mlpy.Svm(kernel='gaussian', C=1.0, kp=0.1, tol=0.001, eps=0.001, maxloops=1000, opt_offset=True) elif(onImg == 3): #for gabore filters problem = mlpy.Svm(kernel='polynomial', kp=0.3, C=1.0, tol=0.0001, eps=0.0001, maxloops=1000, opt_offset=True) #2) shuffle input data to do the 10-fold split shuffle(indexs) labels = labels[indexs] train = train[indexs,:] #3) define the folds, train and test pred_err = 0.0 folds = mlpy.kfoldS(cl = labels, sets = 50, rseed = random.random()) for (trainI,testI) in folds: trainSet, testSet = train[trainI], train[testI] trainLab, testLab = labels[trainI], labels[testI] learned = problem.compute(trainSet, trainLab) print "it learned >>> "+str(learned) prediction = problem.predict(testSet) print prediction pred_err += mlpy.err(testLab, prediction) print pred_err avg_err = float(pred_err)/float(len(folds)) print "\nAverage error over 50 folds:"+str(avg_err) return problem
def testAdvanceGraph3(self): """ This test will learn from a set of ego and alter pairs, then we will make predictions on the pairs and see the results. The we test if the same results are present in a simulation. """ dataDir = PathDefaults.getDataDir() + "infoDiffusion/" matFileName = dataDir + "EgoAlterTransmissions1000.mat" examplesList = ExamplesList.readFromMatFile(matFileName) examplesList.setDefaultExamplesName("X") examplesList.setLabelsName("y") logging.debug(("Number of y = +1: " + str(sum(examplesList.getSampledDataField("y") == 1)))) logging.debug(("Number of y = -1: " + str(sum(examplesList.getSampledDataField("y") == -1)))) #Standardise the examples preprocessor = Standardiser() X = examplesList.getDataField(examplesList.getDefaultExamplesName()) X = preprocessor.standardiseArray(X) examplesList.overwriteDataField(examplesList.getDefaultExamplesName(), X) classifier = MlpySVM(kernel='linear', kp=1, C=32.0) y = examplesList.getDataField("y") classifier.learnModel(X, y) predY = classifier.classify(X) logging.debug(("Number of y = +1: " + str(sum(examplesList.getSampledDataField("y") == 1)))) logging.debug(("Number of y = -1: " + str(sum(examplesList.getSampledDataField("y") == -1)))) sampledY = examplesList.getSampledDataField(examplesList.getLabelsName()).ravel() error = mlpy.err(sampledY, predY) sensitivity = mlpy.sens(sampledY, predY) specificity = mlpy.spec(sampledY, predY) errorP = mlpy.errp(sampledY, predY) errorN = mlpy.errn(sampledY, predY) logging.debug("--- Classification evaluation ---") logging.debug(("Error on " + str(examplesList.getNumExamples()) + " examples is " + str(error))) logging.debug(("Sensitivity (recall = TP/(TP+FN)): " + str(sensitivity))) logging.debug(("Specificity (TN/TN+FP): " + str(specificity))) logging.debug(("Error on positives: " + str(errorP))) logging.debug(("Error on negatives: " + str(errorN))) sGraph = EgoUtils.graphFromMatFile(matFileName) #Notice that the data is preprocessed in the same way as the survey data egoSimulator = EgoSimulator(sGraph, classifier, preprocessor) totalInfo = EgoUtils.getTotalInformation(sGraph) logging.debug(("Total number of people with information: " + str(totalInfo))) self.assertEquals(totalInfo, 1000) sGraph = egoSimulator.advanceGraph() totalInfo = EgoUtils.getTotalInformation(sGraph) logging.debug(("Total number of people with information: " + str(totalInfo))) self.assertEquals(totalInfo, 1000 + sum(predY == 1)) altersList = egoSimulator.getAlters(0) predictedAlters = numpy.nonzero(predY == 1)[0] self.assertTrue((altersList == predictedAlters*2+1).all())