def recomputeCM(self): if not self.results: return cm = orngStat.computeConfusionMatrices(self.results, classIndex=self.targetClass) scores = [(indx, eval("orngStat." + s.f)) for (indx, s) in enumerate(self.stat) if s.cmBased] for (indx, score) in scores: for (i, l) in enumerate( [l for l in self.learners.values() if l.scores]): l.scores[indx] = score[i] self.paintscores()
def generalCVconfMat(data, learners, nFolds = 5): """ General method for printing the X fold CV confusion matrix of an Orange data set (data) with any number of classes. learners is a list of AZorange learners. """ res = orngTest.crossValidation(learners, data, strat=orange.MakeRandomIndices.StratifiedIfPossible, folds = nFolds) classes = data.domain.classVar.values for idx in range(len(learners)): cm = orngStat.computeConfusionMatrices(res)[idx] print "Results for "+learners[idx].name print "\t"+"\t".join(classes) for className, classConfusions in zip(classes, cm): print ("%s" + ("\t%i" * len(classes))) % ((className, ) + tuple(classConfusions))
def cross_validation(self): data = self.data # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) bayes.name = "bayes" tree.name = "tree" l = orange.SVMLearner() l.name = "SVM" l=orange.SVMLearner() l.svm_type=orange.SVMLearner.Nu_SVC l.nu=0.3 l.probability=True learners = [bayes, tree, l] deepcopy # compute accuracies on data res = orngTest.crossValidation(learners, data, folds=10) cm = orngStat.computeConfusionMatrices(res, classIndex=data.domain.classVar.values.index('-1')) stat = (('CA', 'CA(res)'), ('Sens', 'sens(cm)'), ('Spec', 'spec(cm)'), ('AUC', 'AUC(res)'), ('IS', 'IS(res)'), ('Brier', 'BrierScore(res)'), ('F1', 'F1(cm)'), ('F2', 'Falpha(cm, alpha=2.0)'), ('MCC', 'MCC(cm)'), ('sPi', 'scottsPi(cm)'), ) scores = [eval("orngStat."+s[1]) for s in stat] print "Learner " + "".join(["%-7s" % s[0] for s in stat]) for (i, l) in enumerate(learners): print "%-8s " % l.name + "".join(["%5.3f " % s[i] for s in scores]) return None
def generalCVconfMat(data, learners, nFolds=5): """ General method for printing the X fold CV confusion matrix of an Orange data set (data) with any number of classes. learners is a list of AZorange learners. """ res = crossValidation( learners, data, stratified=orange.MakeRandomIndices.StratifiedIfPossible, folds=nFolds) classes = data.domain.classVar.values for idx in range(len(learners)): cm = orngStat.computeConfusionMatrices(res)[idx] print "Results for " + learners[idx].name print "\t" + "\t".join(classes) for className, classConfusions in zip(classes, cm): print("%s" + ("\t%i" * len(classes))) % ( (className, ) + tuple(classConfusions))
# Referenced: c_performance.htm import orange import orngTest, orngStat, orngTree # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) bayes.name = "bayes" tree.name = "tree" learners = [bayes, tree] # compute accuracies on data data = orange.ExampleTable("voting") res = orngTest.crossValidation(learners, data, folds=10) cm = orngStat.computeConfusionMatrices(res, classIndex=data.domain.classVar.values.index("democrat")) stat = ( ("CA", lambda res, cm: orngStat.CA(res)), ("Sens", lambda res, cm: orngStat.sens(cm)), ("Spec", lambda res, cm: orngStat.spec(cm)), ("AUC", lambda res, cm: orngStat.AUC(res)), ("IS", lambda res, cm: orngStat.IS(res)), ("Brier", lambda res, cm: orngStat.BrierScore(res)), ("F1", lambda res, cm: orngStat.F1(cm)), ("F2", lambda res, cm: orngStat.Falpha(cm, alpha=2.0)), ("MCC", lambda res, cm: orngStat.MCC(cm)), ("sPi", lambda res, cm: orngStat.scottsPi(cm)), ) scores = [s[1](res, cm) for s in stat]
# Category: evaluation # Uses: voting.tab # Classes: orngTest.crossValidation # Referenced: c_performance.htm import orange import orngTest, orngStat, orngTree # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) bayes.name = "bayes" tree.name = "tree" learners = [bayes, tree] # compute accuracies on data data = orange.ExampleTable("voting") res = orngTest.crossValidation(learners, data, folds=10) cm = orngStat.computeConfusionMatrices( res, classIndex=data.domain.classVar.values.index('democrat')) stat = (('CA', 'CA(res)'), ('Sens', 'sens(cm)'), ('Spec', 'spec(cm)'), ('AUC', 'AUC(res)'), ('IS', 'IS(res)'), ('Brier', 'BrierScore(res)'), ('F1', 'F1(cm)'), ('F2', 'Falpha(cm, alpha=2.0)'), ('MCC', 'MCC(cm)')) scores = [eval("orngStat." + s[1]) for s in stat] print print "Learner " + "".join(["%-7s" % s[0] for s in stat]) for (i, l) in enumerate(learners): print "%-8s " % l.name + "".join(["%5.3f " % s[i] for s in scores])
def score(self, ids): """compute scores for the list of learners""" if (not self.data): for id in ids: self.learners[id].results = None return # test which learners can accept the given data set # e.g., regressions can't deal with classification data learners = [] n = len(self.data.domain.attributes) * 2 indices = orange.MakeRandomIndices2( p0=min(n, len(self.data)), stratified=orange.MakeRandomIndices2.StratifiedIfPossible) new = self.data.selectref(indices(self.data)) # new = self.data.selectref([1]*min(n, len(self.data)) + # [0]*(len(self.data) - min(n, len(self.data)))) self.warning(0) for l in [self.learners[id] for id in ids]: learner = l.learner if self.preprocessor: learner = self.preprocessor.wrapLearner(learner) try: predictor = learner(new) if predictor(new[0]).varType == new.domain.classVar.varType: learners.append(learner) else: l.scores = [] except Exception as ex: self.warning( 0, "Learner %s ends with exception: %s" % (l.name, str(ex))) l.scores = [] if not learners: return # computation of results (res, and cm if classification) pb = None if self.resampling == 0: pb = OWGUI.ProgressBar(self, iterations=self.nFolds) res = orngTest.crossValidation( learners, self.data, folds=self.nFolds, strat=orange.MakeRandomIndices.StratifiedIfPossible, callback=pb.advance, storeExamples=True) pb.finish() elif self.resampling == 1: pb = OWGUI.ProgressBar(self, iterations=len(self.data)) res = orngTest.leaveOneOut(learners, self.data, callback=pb.advance, storeExamples=True) pb.finish() elif self.resampling == 2: pb = OWGUI.ProgressBar(self, iterations=self.pRepeat) res = orngTest.proportionTest(learners, self.data, self.pLearning / 100., times=self.pRepeat, callback=pb.advance, storeExamples=True) pb.finish() elif self.resampling == 3: pb = OWGUI.ProgressBar(self, iterations=len(learners)) res = orngTest.learnAndTestOnLearnData(learners, self.data, storeExamples=True, callback=pb.advance) pb.finish() elif self.resampling == 4: if not self.testdata: for l in self.learners.values(): l.scores = [] return pb = OWGUI.ProgressBar(self, iterations=len(learners)) res = orngTest.learnAndTestOnTestData(learners, self.data, self.testdata, storeExamples=True, callback=pb.advance) pb.finish() if self.isclassification(): cm = orngStat.computeConfusionMatrices(res, classIndex=self.targetClass) if self.preprocessor: # Unwrap learners learners = [l.wrappedLearner for l in learners] res.learners = learners for l in [self.learners[id] for id in ids]: if l.learner in learners: l.results = res self.error(list(range(len(self.stat)))) scores = [] for i, s in enumerate(self.stat): try: scores.append(eval("orngStat." + s.f)) except Exception as ex: self.error(i, "An error occurred while evaluating orngStat." + s.f + "on %s due to %s" % \ (" ".join([l.name for l in learners]), ex)) scores.append([None] * len(self.learners)) for (i, l) in enumerate(learners): self.learners[l.id].scores = [s[i] if s else None for s in scores] self.sendResults()
bs = Orange.ensemble.boosting.BoostedLearner(tree, name="boosted tree") bg = Orange.ensemble.bagging.BaggedLearner(tree, name="bagged tree") svm = Orange.classification.svm.SVMLearnerEasy(name='svm') # You can test different learning methods alone or all at once # by adjusting the simple array of methods here. Orange contains # many more methods. In particular, SVM has several variations, # such as different basis functions. You can set up many of these # and include them here or not. learners = [knn, svm, bayes, tree, forest, bs, bg] # Carry out the cross validation calculations with all of the different learning methods results = Orange.evaluation.testing.cross_validation(learners, data, folds=kFolds) # Compute statistics on the results and print out cm = orngStat.computeConfusionMatrices(results, class_index=data.domain.classVar.values.index(target)) ma = orngFSS.attMeasure(data) t0 = orngStat.CA(results) roc = orngStat.splitByIterations(results) #print "shape of roc = ", np.shape(roc) stat = (('CA', 'CA(results)'), ('Sens', 'sens(cm)'), ('Spec', 'spec(cm)'), ('AUC', 'AUC(results)'), ('IS', 'IS(results)'), ('Brier', 'BrierScore(results)')) #---------------------------------------------------------------------------------------- # Perform a permutation analysis to compute an empirical p-value. The
tests = [(cca, "cca"), (semantic, "semantic"), (misc, "misc"), (cca + semantic, "cca + semantic"), (cca + misc, "cca + misc"), (semantic + misc, "semantic + misc"), (cca + semantic + misc, "cca + semantic + misc")] for test in tests: attributes = [ attr for attr in data.domain.features if attr.name in test[0] ] new_domain = Orange.data.Domain(attributes, data.domain.class_var) new_data = Orange.data.Table(new_domain, data) classIndex = list(new_data.domain.classVar.values).index("yes") svm = Orange.classification.svm.SVMLearner() res = Orange.evaluation.testing.cross_validation([svm], new_data, folds=10) cm = orngStat.computeConfusionMatrices(res, classIndex=classIndex) print "======" print test[1] print "attributes: ", print ",".join([attr.name for attr in new_data.domain.features]) print "Accuracy: %.1f%%" % (100 * orngStat.CA(res)[0]) print "Precision: %.3f" % orngStat.precision(cm)[0] print "Recall: %.3f" % orngStat.recall(cm)[0] print "F1: %.3f" % orngStat.F1(cm)[0] res.all_results = res.results res.number_of_iterations = 1 CAs = []