Beispiel #1
0
 def recomputeCM(self):
     if not self.results:
         return
     cm = orngStat.computeConfusionMatrices(self.results,
                                            classIndex=self.targetClass)
     scores = [(indx, eval("orngStat." + s.f))
               for (indx, s) in enumerate(self.stat) if s.cmBased]
     for (indx, score) in scores:
         for (i, l) in enumerate(
             [l for l in self.learners.values() if l.scores]):
             l.scores[indx] = score[i]
     self.paintscores()
Beispiel #2
0
def generalCVconfMat(data, learners, nFolds = 5):
    """
    General method for printing the X fold CV confusion matrix of an Orange data set (data)
    with any number of classes. learners is a list of AZorange learners.
    """

    res = orngTest.crossValidation(learners, data, strat=orange.MakeRandomIndices.StratifiedIfPossible, folds = nFolds)
    classes = data.domain.classVar.values

    for idx in range(len(learners)):
        cm = orngStat.computeConfusionMatrices(res)[idx]
        print "Results for "+learners[idx].name
        print "\t"+"\t".join(classes)
        for className, classConfusions in zip(classes, cm):
            print ("%s" + ("\t%i" * len(classes))) % ((className, ) + tuple(classConfusions))
Beispiel #3
0
    def cross_validation(self):
        
        data = self.data
        # set up the learners
        bayes = orange.BayesLearner()
        tree = orngTree.TreeLearner(mForPruning=2)
        bayes.name = "bayes"
        tree.name = "tree"
        
        l = orange.SVMLearner() 
        l.name = "SVM"
        
        l=orange.SVMLearner() 
        l.svm_type=orange.SVMLearner.Nu_SVC 
        l.nu=0.3 
        l.probability=True 
        
        learners = [bayes, tree, l]
        deepcopy
        # compute accuracies on data
        
        
        res = orngTest.crossValidation(learners, data, folds=10)
        cm = orngStat.computeConfusionMatrices(res,
                classIndex=data.domain.classVar.values.index('-1'))
        
        stat = (('CA', 'CA(res)'),
                ('Sens', 'sens(cm)'),
                ('Spec', 'spec(cm)'),
                ('AUC', 'AUC(res)'),
                ('IS', 'IS(res)'),
                ('Brier', 'BrierScore(res)'),
                ('F1', 'F1(cm)'),
                ('F2', 'Falpha(cm, alpha=2.0)'),
                ('MCC', 'MCC(cm)'),
                ('sPi', 'scottsPi(cm)'),
                )

        scores = [eval("orngStat."+s[1]) for s in stat]
        print "Learner  " + "".join(["%-7s" % s[0] for s in stat])
        for (i, l) in enumerate(learners):
            print "%-8s " % l.name + "".join(["%5.3f  " % s[i] for s in scores])
    
        return None
Beispiel #4
0
def generalCVconfMat(data, learners, nFolds=5):
    """
    General method for printing the X fold CV confusion matrix of an Orange data set (data)
    with any number of classes. learners is a list of AZorange learners.
    """

    res = crossValidation(
        learners,
        data,
        stratified=orange.MakeRandomIndices.StratifiedIfPossible,
        folds=nFolds)
    classes = data.domain.classVar.values

    for idx in range(len(learners)):
        cm = orngStat.computeConfusionMatrices(res)[idx]
        print "Results for " + learners[idx].name
        print "\t" + "\t".join(classes)
        for className, classConfusions in zip(classes, cm):
            print("%s" + ("\t%i" * len(classes))) % (
                (className, ) + tuple(classConfusions))
# Referenced:  c_performance.htm

import orange
import orngTest, orngStat, orngTree

# set up the learners
bayes = orange.BayesLearner()
tree = orngTree.TreeLearner(mForPruning=2)
bayes.name = "bayes"
tree.name = "tree"
learners = [bayes, tree]

# compute accuracies on data
data = orange.ExampleTable("voting")
res = orngTest.crossValidation(learners, data, folds=10)
cm = orngStat.computeConfusionMatrices(res, classIndex=data.domain.classVar.values.index("democrat"))

stat = (
    ("CA", lambda res, cm: orngStat.CA(res)),
    ("Sens", lambda res, cm: orngStat.sens(cm)),
    ("Spec", lambda res, cm: orngStat.spec(cm)),
    ("AUC", lambda res, cm: orngStat.AUC(res)),
    ("IS", lambda res, cm: orngStat.IS(res)),
    ("Brier", lambda res, cm: orngStat.BrierScore(res)),
    ("F1", lambda res, cm: orngStat.F1(cm)),
    ("F2", lambda res, cm: orngStat.Falpha(cm, alpha=2.0)),
    ("MCC", lambda res, cm: orngStat.MCC(cm)),
    ("sPi", lambda res, cm: orngStat.scottsPi(cm)),
)

scores = [s[1](res, cm) for s in stat]
Beispiel #6
0
# Category:    evaluation
# Uses:        voting.tab
# Classes:     orngTest.crossValidation
# Referenced:  c_performance.htm

import orange
import orngTest, orngStat, orngTree

# set up the learners
bayes = orange.BayesLearner()
tree = orngTree.TreeLearner(mForPruning=2)
bayes.name = "bayes"
tree.name = "tree"
learners = [bayes, tree]

# compute accuracies on data
data = orange.ExampleTable("voting")
res = orngTest.crossValidation(learners, data, folds=10)
cm = orngStat.computeConfusionMatrices(
    res, classIndex=data.domain.classVar.values.index('democrat'))

stat = (('CA', 'CA(res)'), ('Sens', 'sens(cm)'), ('Spec', 'spec(cm)'),
        ('AUC', 'AUC(res)'), ('IS', 'IS(res)'), ('Brier', 'BrierScore(res)'),
        ('F1', 'F1(cm)'), ('F2', 'Falpha(cm, alpha=2.0)'), ('MCC', 'MCC(cm)'))

scores = [eval("orngStat." + s[1]) for s in stat]
print
print "Learner  " + "".join(["%-7s" % s[0] for s in stat])
for (i, l) in enumerate(learners):
    print "%-8s " % l.name + "".join(["%5.3f  " % s[i] for s in scores])
Beispiel #7
0
    def score(self, ids):
        """compute scores for the list of learners"""
        if (not self.data):
            for id in ids:
                self.learners[id].results = None
            return
        # test which learners can accept the given data set
        # e.g., regressions can't deal with classification data
        learners = []
        n = len(self.data.domain.attributes) * 2
        indices = orange.MakeRandomIndices2(
            p0=min(n, len(self.data)),
            stratified=orange.MakeRandomIndices2.StratifiedIfPossible)
        new = self.data.selectref(indices(self.data))
        #        new = self.data.selectref([1]*min(n, len(self.data)) +
        #                                  [0]*(len(self.data) - min(n, len(self.data))))
        self.warning(0)
        for l in [self.learners[id] for id in ids]:
            learner = l.learner
            if self.preprocessor:
                learner = self.preprocessor.wrapLearner(learner)
            try:
                predictor = learner(new)
                if predictor(new[0]).varType == new.domain.classVar.varType:
                    learners.append(learner)
                else:
                    l.scores = []
            except Exception as ex:
                self.warning(
                    0,
                    "Learner %s ends with exception: %s" % (l.name, str(ex)))
                l.scores = []

        if not learners:
            return

        # computation of results (res, and cm if classification)
        pb = None
        if self.resampling == 0:
            pb = OWGUI.ProgressBar(self, iterations=self.nFolds)
            res = orngTest.crossValidation(
                learners,
                self.data,
                folds=self.nFolds,
                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                callback=pb.advance,
                storeExamples=True)
            pb.finish()
        elif self.resampling == 1:
            pb = OWGUI.ProgressBar(self, iterations=len(self.data))
            res = orngTest.leaveOneOut(learners,
                                       self.data,
                                       callback=pb.advance,
                                       storeExamples=True)
            pb.finish()
        elif self.resampling == 2:
            pb = OWGUI.ProgressBar(self, iterations=self.pRepeat)
            res = orngTest.proportionTest(learners,
                                          self.data,
                                          self.pLearning / 100.,
                                          times=self.pRepeat,
                                          callback=pb.advance,
                                          storeExamples=True)
            pb.finish()
        elif self.resampling == 3:
            pb = OWGUI.ProgressBar(self, iterations=len(learners))
            res = orngTest.learnAndTestOnLearnData(learners,
                                                   self.data,
                                                   storeExamples=True,
                                                   callback=pb.advance)
            pb.finish()

        elif self.resampling == 4:
            if not self.testdata:
                for l in self.learners.values():
                    l.scores = []
                return
            pb = OWGUI.ProgressBar(self, iterations=len(learners))
            res = orngTest.learnAndTestOnTestData(learners,
                                                  self.data,
                                                  self.testdata,
                                                  storeExamples=True,
                                                  callback=pb.advance)
            pb.finish()
        if self.isclassification():
            cm = orngStat.computeConfusionMatrices(res,
                                                   classIndex=self.targetClass)

        if self.preprocessor:  # Unwrap learners
            learners = [l.wrappedLearner for l in learners]

        res.learners = learners

        for l in [self.learners[id] for id in ids]:
            if l.learner in learners:
                l.results = res

        self.error(list(range(len(self.stat))))
        scores = []
        for i, s in enumerate(self.stat):
            try:
                scores.append(eval("orngStat." + s.f))

            except Exception as ex:
                self.error(i, "An error occurred while evaluating orngStat." + s.f + "on %s due to %s" % \
                           (" ".join([l.name for l in learners]), ex))
                scores.append([None] * len(self.learners))

        for (i, l) in enumerate(learners):
            self.learners[l.id].scores = [s[i] if s else None for s in scores]

        self.sendResults()
Beispiel #8
0
    bs = Orange.ensemble.boosting.BoostedLearner(tree, name="boosted tree")
    bg = Orange.ensemble.bagging.BaggedLearner(tree, name="bagged tree")
    svm = Orange.classification.svm.SVMLearnerEasy(name='svm')

    # You can test different learning methods alone or all at once
    # by adjusting the simple array of methods here. Orange contains
    # many more methods. In particular, SVM has several variations,
    # such as different basis functions. You can set up many of these
    # and include them here or not.
    learners = [knn, svm, bayes, tree, forest, bs, bg]

    # Carry out the cross validation calculations with all of the different learning methods
    results = Orange.evaluation.testing.cross_validation(learners, data, folds=kFolds)

    # Compute statistics on the results and print out
    cm = orngStat.computeConfusionMatrices(results, class_index=data.domain.classVar.values.index(target))
    ma = orngFSS.attMeasure(data)
    t0 = orngStat.CA(results)

    roc = orngStat.splitByIterations(results)
    #print "shape of roc = ", np.shape(roc)

    stat = (('CA', 'CA(results)'),
            ('Sens', 'sens(cm)'),
            ('Spec', 'spec(cm)'),
            ('AUC', 'AUC(results)'),
            ('IS', 'IS(results)'),
            ('Brier', 'BrierScore(results)'))

    #----------------------------------------------------------------------------------------
    # Perform a permutation analysis to compute an empirical p-value. The
tests = [(cca, "cca"), (semantic, "semantic"), (misc, "misc"),
         (cca + semantic, "cca + semantic"), (cca + misc, "cca + misc"),
         (semantic + misc, "semantic + misc"),
         (cca + semantic + misc, "cca + semantic + misc")]

for test in tests:
    attributes = [
        attr for attr in data.domain.features if attr.name in test[0]
    ]
    new_domain = Orange.data.Domain(attributes, data.domain.class_var)
    new_data = Orange.data.Table(new_domain, data)
    classIndex = list(new_data.domain.classVar.values).index("yes")

    svm = Orange.classification.svm.SVMLearner()
    res = Orange.evaluation.testing.cross_validation([svm], new_data, folds=10)
    cm = orngStat.computeConfusionMatrices(res, classIndex=classIndex)

    print "======"
    print test[1]
    print "attributes: ",
    print ",".join([attr.name for attr in new_data.domain.features])

    print "Accuracy:  %.1f%%" % (100 * orngStat.CA(res)[0])
    print "Precision: %.3f" % orngStat.precision(cm)[0]
    print "Recall:    %.3f" % orngStat.recall(cm)[0]
    print "F1:        %.3f" % orngStat.F1(cm)[0]

    res.all_results = res.results
    res.number_of_iterations = 1

    CAs = []