def evaluating(self, event): train_data = orange.ExampleTable("classification.tab") bayes2 = orange.BayesLearner() tree2 = orngTree.TreeLearner() knnLearner2 = orange.kNNLearner() knnLearner2.k = 10 # k == 18 seems to be best (at least for 2-3) #svm2 = svm.SVMLearner() bayes2.name = "bayes2" tree2.name = "tree2" knnLearner2.name = "knn2" learners = [bayes2, tree2, knnLearner2] results = orngTest.crossValidation(learners, train_data, folds=10) print "train_data:" print train_data print "k==" print knnLearner2.k print 'Learner CA IS Brier AUC' c = '' for i in range(len(learners)): print "%-8s %5.3f %5.3f %5.3f %5.3f" % (learners[i].name, \ orngStat.CA(results)[i], orngStat.IS(results)[i], orngStat.BrierScore(results)[i], orngStat.AUC(results)[i]) c = c+str("%-8s"%learners[i].name)+'\t'+ str("%5.3f" %orngStat.CA(results)[i])+'\t'+ str("%5.3f" %orngStat.IS(results)[i])+'\t'+str("%5.3f" %orngStat.BrierScore(results)[i])+'\t'+str("%5.3f" %orngStat.AUC(results)[i]+'\n') self.logger2.AppendText('Learner CA IS Brier AUC:\n %s\n' % c) return (c)
def classify(datafile): data = orange.ExampleTable(datafile) # setting up the classifiers majority = orange.MajorityLearner(data) bayes = orange.BayesLearner(data) tree = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=2) knn = orange.kNNLearner(data, k=1000) majority.name="Majority"; bayes.name="Naive Bayes"; tree.name="Tree"; knn.name="kNN" classifiers = [majority, bayes, tree, knn] # print the head print "Possible classes:", data.domain.classVar.values print "Probability for republican:" print "Original Class", for l in classifiers: print "%-13s" % (l.name), print # classify first 10 instances and print probabilities for example in data[:100]: print "(%-10s) " % (example.getclass()), for c in classifiers: p = apply(c, [example, orange.GetProbabilities]) print "%5.3f " % (p[0]), print
def getKnnClassifier(self, data, domainList): dataList = [] i = 0 for domain in domainList: partData = [(data[x][:self.FeatureNum] + [data[x][self.FeatureNum+i]]) for x in range(len(data))] table = orange.ExampleTable(domain, partData) dataList.append(table) i = i + 1 #tree = orange.TreeLearner(table) #treeList.append(tree) treeList = [] for data in dataList: #tree = orngTree.TreeLearner() #tunedTree = orngWrap.Tune1Parameter(object=tree, parameter='m_pruning', \ #values=[0, 0.1, 0.2, 1, 2, 5, 10], verbose=2, \ #values=[0], verbose=2, \ #returnWhat=orngWrap.TuneParameters.returnClassifier) #classifier = tunedTree(data) print "begin training...." knn = orange.kNNLearner(data, k=1) print "finish training" treeList.append(knn) return treeList
def orange_classification(self, event): # classification train_data = orange.ExampleTable("classification.tab") test_data = orange.ExampleTable("testing.tab") bayes = orange.BayesLearner(train_data) tree = orngTree.TreeLearner(train_data) knnLearner = orange.kNNLearner(train_data) knnLearner.k = 10 # k == 18 seems to be best (at least for 2-3) # svm2 = svm.SVMLearner() bayes.name = "bayes:" tree.name = "tree:" knnLearner.name = "knn:" classifiers= [bayes, tree, knnLearner] #classifier = orange.BayesLearner(train_data) c='' for i in range(len(test_data)): c0 = classifiers[0](test_data[i]) c1 = classifiers[1](test_data[i]) #print c1 c2 = classifiers[2](test_data[i]) #print c2 #print c0, c1, c2 c = c + bayes.name+str(c0)+'\t'+tree.name+str(c1)+'\t'+knnLearner.name+str(c2)+'\t'+str(i)+'\n' #import testorange #print "location predicted", testorange.classification() self.logger1.AppendText('Location Predicted: %s\n' % c) return c
def learn(self, xvals, yvals, couplings): ny = len(yvals) for i in range(ny): ysub = yvals[i] idxs = self.couplingFun(couplings, i) xsub = xvals[idxs] domain = self.makeDomain(xsub) data = self.trainingData(xsub, ysub, domain) learned_model = orange.kNNLearner(data, k = self.k) self.addRegressor(learned_model, idxs, domain)
def run_knn(x,y, train_idxs, test_idxs): all_examples = get_ex(x,y) train_examples = all_examples.getitems(train_idxs) test_examples = all_examples.getitems(test_idxs) k = 5 c = orange.kNNLearner(train_examples, k = k) return getpred(c,test_examples)
def knn_classifier(tree, outfile="dev.tab", **kwargs): import orange, orngTest classes = [] # TODO: skipp skriving til fil outfile = outfile.rsplit(".",1)[0] data = orange.ExampleTable(outfile) knn = orange.kNNLearner(data, k=1, name="knn") for i, example in enumerate(data, 1): p = apply(knn, [example, orange.GetProbabilities])["YES"] classes.append((i, p)) return classes
def knn(self): random.seed() seed = int(random.random()*1000) rndind = orange.MakeRandomIndices2(self.data, p0=(self.folds-1)/self.folds, randseed = seed) train = data.select(rndind, 0) test = data.select(rndind, 1) knn = orange.kNNLearner(train, k = self.k, rankWeight = True) occ = 0 for inp in test: if knn(inp) == inp.getclass(): occ += 1 ca = occ/len(test) self.acc.append(ca)
def tweaked(outfile, **kwargs): import orange, orngTest classes = [] outfile = outfile.rsplit(".",1)[0] data = orange.ExampleTable(outfile) #c45 = orange.C45Learner(minObjs=100) c45 = orange.kNNLearner(minObjs=100) results = orngTest.crossValidation([c45], data, folds=10) for i, example in enumerate(results.results, 1): p = example.probabilities[0] classes.append((i, p[1])) return classes
def test_anonymous_from_numpy(self): a = numpy.zeros((4, 5)) a[0, 0] = 0.5 a[3, 1] = 11 a[3, 2] = -1 a[1, 3] = 5 knn = orange.kNNLearner(a) data = knn.find_nearest.examples domain = data.domain self.assertEqual([var.name for var in domain.variables], ["var%05i" % i for i in range(5)]) self.assertEqual(domain[0].var_type, orange.VarTypes.Continuous) self.assertEqual(domain[1].var_type, orange.VarTypes.Continuous) self.assertEqual(domain[2].var_type, orange.VarTypes.Continuous) self.assertEqual(domain[3].var_type, orange.VarTypes.Discrete) self.assertEqual(domain[3].values, ["v%i" % i for i in range(6)]) self.assertEqual(domain[4].var_type, orange.VarTypes.Discrete) self.assertEqual(domain[4].values, ["v0"]) dom2 = orange.Domain([ orange.ContinuousVariable("a"), orange.ContinuousVariable("b"), orange.ContinuousVariable("c"), orange.DiscreteVariable("d", values="abcdef"), orange.DiscreteVariable("e", values="ab") ]) ex = orange.Example(dom2, [3.14, 42, 2.7, "d", "a"]) knn(ex) ex1 = domain(ex) self.assertEqual(list(map(float, ex1)), list(map(float, ex))) ex2 = dom2(ex1) self.assertEqual(list(map(float, ex1)), list(map(float, ex2))) self.assertEqual(ex1, ex2) self.assertEqual(ex1, ex) iris = orange.ExampleTable("iris") with self.assertRaises(ValueError): domain(iris[0]) knn.find_nearest.examples = numpy.zeros((3, 3)) domain = knn.find_nearest.examples.domain for i in range(3): var = domain[i] self.assertEqual(var.name, "var%05i" % i) self.assertEqual(var.values, ["v0"])
def __init__(self,name, train_examples, measure = "retis", test_ex = None): self.training = train_examples self.name = name if name == 'knn': k = 5 c = orange.kNNLearner(train_examples, k = k) elif name == 'tree': c = orange.TreeLearner(train_examples,measure = 'retis' , mForPruning=4, minExamples=2) if test_ex: print getpred(c, test_ex) elif name == 'forest': tree = orange.TreeLearner(measure=measure, mForPruning=2, minExamples=4) c = orngEnsemble.RandomForestLearner(train_examples, trees=50, learner = tree) self.classifier = c
def xtestChinaEuropeSimpler(): import orange, orngTree # @UnusedImport @UnresolvedImport trainData = orange.ExampleTable('ismir2011_fb_folkTrain.tab') testData = orange.ExampleTable('ismir2011_fb_folkTest.tab') majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print (majWrong/total, knnWrong/total)
def xtestChinaEuropeSimpler(): import orange, orngTree trainData = orange.ExampleTable("ismir2011_fb_folkTrain.tab") testData = orange.ExampleTable("ismir2011_fb_folkTest.tab") majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print majWrong / total, knnWrong / total
def knn_classifier_xv(tree, outfile="dev.tab", **kwargs): import orange, orngTest, orngStat classes = [] # TODO: skipp skriving til fil outfile = outfile.rsplit(".",1)[0] data = orange.ExampleTable(outfile) knn = orange.kNNLearner(k=21, name="knn") results = orngTest.crossValidation([knn], data, folds=10) # output the results print "Learner CA IS Brier AUC" print "%-8s %5.3f %5.3f %5.3f %5.3f" % (knn.name, \ orngStat.CA(results)[0], orngStat.IS(results)[0], orngStat.BrierScore(results)[0], orngStat.AUC(results)[0]) print results.results[0].probabilities for i, example in enumerate(results.results, 1): p = example.probabilities[0] classes.append((i, p[1])) return classes
def testTrecentoSimpler(): import orange, orngTree trainData = orange.ExampleTable("d:/desktop/trecento2.tab") testData = orange.ExampleTable("d:/desktop/trecento1.tab") majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print majWrong / total, knnWrong / total
def testTrecentoSimpler(): import orange, orngTree # @UnusedImport @UnresolvedImport trainData = orange.ExampleTable('d:/desktop/trecento2.tab') testData = orange.ExampleTable('d:/desktop/trecento1.tab') majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print (majWrong/total, knnWrong/total)
ow = OWPredictions() ow.show() import orngTree dataset = orange.ExampleTable('../../doc/datasets/iris.tab') # dataset = orange.ExampleTable('../../doc/datasets/auto-mpg.tab') ind = orange.MakeRandomIndices2(p0=0.5)(dataset) data = dataset.select(ind, 0) test = dataset.select(ind, 1) testnoclass = orange.ExampleTable(orange.Domain(test.domain.attributes, False), test) tree = orngTree.TreeLearner(data) tree.name = "tree" maj = orange.MajorityLearner(data) maj.name = "maj" knn = orange.kNNLearner(data, k = 10) knn.name = "knn" # ow.setData(test) # # ow.setPredictor(maj, 1) if 1: # data set only ow.setData(test) if 0: # two predictors, test data with class ow.setPredictor(maj, 1) ow.setPredictor(tree, 2) ow.setData(test) if 0: # two predictors, test data with no class
import orange, orngTree, orngTest, orngStat data = orange.ExampleTable("housing.tab") selection = orange.MakeRandomIndices2(data, 0.5) train_data = data.select(selection, 0) test_data = data.select(selection, 1) maj = orange.MajorityLearner(train_data) maj.name = "default" rt = orngTree.TreeLearner(train_data, measure="retis", mForPruning=2, minExamples=20) rt.name = "reg. tree" k = 5 knn = orange.kNNLearner(train_data, k=k) knn.name = "k-NN (k=%i)" % k regressors = [maj, rt, knn] print "\n%10s " % "original", for r in regressors: print "%10s " % r.name, print for i in range(10): print "%10.1f " % test_data[i].getclass(), for r in regressors: print "%10.1f " % r(test_data[i]), print
def run(x,y): datab = get_ex(x,y,xb = 0, yb = 1) datac = get_ex(x,y,xb = 0, yb = 0) predictions = [] classify = False regress = False ensemble = True if classify: data = datab lr = orange.LinearLearner(datab) tc = orange.TreeLearner(datab) lgr= orange.SVMLearner(datab) learners = [lr,tc,lgr] elif regress: data = datac maj = orange.MajorityLearner(train_data) maj.name = "default" rt = orngTree.TreeLearner(train_data, measure="retis", mForPruning=2, minExamples=20) rt.name = "reg. tree" k = 5 knn = orange.kNNLearner(train_data, k=k) knn.name = "k-NN (k=%i)" % k learners = [maj,rt, knn] elif ensemble: data = datab tree = orngTree.TreeLearner(mForPruning=2, name="tree") bs = orngEnsemble.BoostedLearner(tree, name="boosted tree") bg = orngEnsemble.BaggedLearner(tree, name="bagged tree") forest = orngEnsemble.RandomForestLearner(trees=50, name="forest") #data = orange.ExampleTable("lymphography.tab") learners = [tree, bs, bg,forest] results = orngTest.crossValidation(learners, data) print "Classification Accuracy:" for i in range(len(learners)): print ("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i]) raise Exception() tree = orngTree.TreeLearner( minExamples=2, mForPrunning=2, \ sameMajorityPruning=True, name='tree') #qbs = orngEnsemble.BoostedLearner(tree,data, name="boosted tree") #bg = orngEnsemble.BaggedLearner(tree,data, name="bagged tree") forest = orngEnsemble.RandomForestLearner(data,trees=50, name="forest") #learners = [tree, bs, bg] #results = orngTest.crossValidation(learners, data) #print "Classification Accuracy:" #for i in range(len(learners)): # print ("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i]) learners = [tree, forest] results = orngTest.crossValidation(learners, data) print "Learner CA Brier AUC" for i in range(len(learners)): print "%-8s %5.3f %5.3f %5.3f" % (learners[i].name, \ orngStat.CA(results)[i], orngStat.BrierScore(results)[i], orngStat.AUC(results)[i]) for l in learners: for d in data: p = l(d) if l == learners[-1]: predictions.append(p) predictions = array([predictions],float) print y print predictions return predictions
data = orange.ExampleTable("features.tab") folds = 10 k = 150 cv = CrossVal(data, folds, k) cv.run_kNN() cv.printCA() # Builtin ClossValidator, with several classifiers, only used for testing early on if False: # set up the learners bayes = orange.BayesLearner() tree = orngTree.TreeLearner(mForPruning=2) knn = orange.kNNLearner(k=k) bayes.name = "bayes" tree.name = "tree" knn.name = "knn" learners = [bayes, tree, knn] # compute accuracies on data data = orange.ExampleTable("features.tab") # Create a crossvalidation on the sampleset so that you don't classify it's own data results = orngTest.crossValidation(learners, data, folds=10) # output the results print "Learner \tAccuracy" for i in range(len(learners)): print "%-8s\t%5.3f%%" % (learners[i].name, orngStat.CA(results)[i]*100)
#!/bin/bash /Applications/Orange.app/Contents/MacOS/python import orange, orngTest, orngStat bayes = orange.BayesLearner(name='naive bayes') knn = orange.kNNLearner(name='knn') learners = [bayes, knn] data = orange.ExampleTable("testdata") results = orngTest.crossValidation(learners, data, folds=10) cdt = orngStat.computeCDT(results) print "Learner CA IS BRIER AUC" for i in range(len(learners)): print "%-8s %5.3f %5.3f %5.3f %5.3f" % ( learners[i].name, orngStat.CA(results)[i], orngStat.IS(results)[i], orngStat.BrierScore(results)[i], orngStat.AROCFromCDT(cdt[i])[7])
def knn(input_dict): import orange output_dict = {} output_dict['knnout'] = orange.kNNLearner(name="kNN (Orange)") return output_dict
def learner(self, data, k): return orange.kNNLearner(data, k)
#######for i in range(len(classifiers)): #######print classifiers[i].name, acc[i] ######## #######acc2 = accuracy2(test_data, classifiersConfusion, 11) #######print "confusion matrix for the "+classifiersConfusion[0].name+" classifier:" #######for i in range(len(acc2)): #######print acc2[i] ################################################### ### Now: Confusion Matrix!!! ################################################### bayes2 = orange.BayesLearner() tree2 = orngTree.TreeLearner() knnLearner2 = orange.kNNLearner() knnLearner2.k = 10 # k == 18 seems to be best (at least for 2-3) #svm2 = svm.SVMLearner() bayes2.name = "bayes2" tree2.name = "tree2" knnLearner2.name = "knn2" #svm2.name = "svm2" learners = [bayes2, tree2, knnLearner2] results = orngTest.crossValidation(learners, train_data, folds=10) # output the results print "train_data:" print filename_train print "k=="
# Description: Shows how to use the nearest-neighbour learning # Category: learning # Classes: kNNLearner, kNNClassifier, ExamplesDistance, ExamplesDistanceConstructor # Uses: iris # Referenced: kNNLearner.htm import orange, orngTest, orngStat data = orange.ExampleTable("iris") rndind = orange.MakeRandomIndices2(data, p0=0.8) train = data.select(rndind, 0) test = data.select(rndind, 1) knn = orange.kNNLearner(train, k=10) for i in range(5): example = test.randomexample() print example.getclass(), knn(example) print "\n\n" data = orange.ExampleTable("iris") knn = orange.kNNLearner() knn.k = 10 knn.distanceConstructor = orange.ExamplesDistanceConstructor_Hamming() knn = knn(train) for i in range(5): example = test.randomexample() print example.getclass(), knn(example)
# Uses: housing.tab # Referenced: orngStat.htm import orange import orngRegression as r import orngTree import orngStat, orngTest data = orange.ExampleTable("housing") # definition of regressors lr = r.LinearRegressionLearner(name="lr") rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20, name="rt") maj = orange.MajorityLearner(name="maj") knn = orange.kNNLearner(k=10, name="knn") learners = [maj, rt, knn, lr] # cross validation, selection of scores, report of results results = orngTest.crossValidation(learners, data, folds=3) scores = [("MSE", orngStat.MSE), ("RMSE", orngStat.RMSE), ("MAE", orngStat.MAE), ("RSE", orngStat.RSE), ("RRSE", orngStat.RRSE), ("RAE", orngStat.RAE), ("R2", orngStat.R2)] print "Learner " + "".join(["%-8s" % s[0] for s in scores]) for i in range(len(learners)): print "%-8s " % learners[i].name + \ "".join(["%7.3f " % s[1](results)[i] for s in scores])
# Description: Uses cross-validation to compare regression tree and k-nearest neighbors # Category: modelling, evaluation # Uses: housing # Classes: orngStat.MSE, orngTest.crossValidation, MajorityLearner, orngTree.TreeLearner, orange.kNNLearner # Referenced: regression.htm import orange, orngTree, orngTest, orngStat data = orange.ExampleTable("../datasets/housing.tab") maj = orange.MajorityLearner() maj.name = "default" rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20) rt.name = "regression tree" k = 5 knn = orange.kNNLearner(k=k) knn.name = "k-NN (k=%i)" % k learners = [maj, rt, knn] data = orange.ExampleTable("../datasets/housing.tab") results = orngTest.crossValidation(learners, data, folds=10) mse = orngStat.MSE(results) print "Learner MSE" for i in range(len(learners)): print "%-15s %5.3f" % (learners[i].name, mse[i])
def knn(input_dict): import orange output_dict = {} output_dict['knnout']= orange.kNNLearner(name = "kNN (Orange)") return output_dict
# Classes: orngTest.crossValidation, orngTree.TreeLearner, orange.kNNLearner, orngRegression.LinearRegressionLearner # Referenced: regression.htm import orange import orngRegression import orngTree import orngStat, orngTest data = orange.ExampleTable("housing") # definition of learners (regressors) lr = orngRegression.LinearRegressionLearner(name="lr") rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20, name="rt") maj = orange.MajorityLearner(name="maj") knn = orange.kNNLearner(k=10, name="knn") learners = [maj, lr, rt, knn] # evaluation and reporting of scores results = orngTest.crossValidation(learners, data, folds=10) scores = [("MSE", orngStat.MSE), ("RMSE", orngStat.RMSE), ("MAE", orngStat.MAE), ("RSE", orngStat.RSE), ("RRSE", orngStat.RRSE), ("RAE", orngStat.RAE), ("R2", orngStat.R2)] print "Learner " + "".join(["%-7s" % s[0] for s in scores]) for i in range(len(learners)): print "%-8s " % learners[i].name + "".join( ["%6.3f " % s[1](results)[i] for s in scores])
def __init__(self, data): self.learner = kNNLearner (data.get_data())