def cforange_confusion_matrix(input_dict): import orngStat results = input_dict['results'] classIndex = int(input_dict['classIndex']) if input_dict['cutoff']!='': cutoff = float(input_dict['cutoff']) cm = orngStat.confusionMatrices(results,classIndex=classIndex,cutoff=cutoff) else: cm = orngStat.confusionMatrices(results,classIndex=classIndex) if len(cm)==1: cm = cm[0] output_dict = {} output_dict['cm']=cm return output_dict
def ConfMat(res = None): """ Returns a confusion matrix in the form of a vector: For Binary classifiers: [[TP, FN], [FP, TN]] For classifiers with class having N values: Predicted class | A B C --------------------------- known A | tpA eAB eAC class B | eBA tpB eBC C | eCA eCB tpC [[tpA, eAB, ..., eAN], [eBA, tpB, ..., eBN], ..., [eNA, eNB, ..., tpN ]] where A, B, C are the class values in the same order as testData.domain.classVar.values """ if res == None: return {"type":CLASSIFICATION} confMat = orngStat.confusionMatrices(res)[0] if len(res.classValues) == 2: cm = [[confMat.TP, confMat.FN],[confMat.FP, confMat.TN]] else: cm = confMat return cm
def __call__(self, examples, weightID=0): if self.classifier is None: classifier = self.learner(examples, weightID=weightID) else: classifier = self.classifier for f in examples.domain.classVar.values: print "f", f, f.__class__ classIndex = examples.domain.classVar.values.index(self.positiveValue) results = orngTest.testOnData([classifier], examples) thresholds = list(na.arange(0, 1.1, 0.01)) matrices = [ orngStat.confusionMatrices(results, classIndex=classIndex, cutoff=threshold)[0] for threshold in thresholds ] fscores = map(fScore, matrices) i, score = math2d.argMax(fscores) threshold = thresholds[i] print "fscores", fscores print "threshold", threshold print "score", score return ThresholdProbabilityClassifier(self.classifier, threshold, self.positiveValue, self.negativeValue)
def ConfMat(res=None): """ Returns a confusion matrix in the form of a vector: For Binary classifiers: [[TP, FN], [FP, TN]] For classifiers with class having N values: Predicted class | A B C --------------------------- experimental A | tpA eAB eAC eAB is read as: Error, should be A instead of B class B | eBA tpB eBC C | eCA eCB tpC [[tpA, eAB, ..., eAN], [eBA, tpB, ..., eBN], ..., [eNA, eNB, ..., tpN ]] where A, B, C are the class values in the same order as testData.domain.classVar.values """ if res == None: return {"type": CLASSIFICATION} confMat_s = orngStat.confusionMatrices(res) retCM = [] for confMat in confMat_s: if len(res.classValues) == 2: # NOTE: orngStat returns TN, TP, FN, FP with inverted labels, so we have to set properly: cm = [[confMat.TN, confMat.FP], [confMat.FN, confMat.TP]] else: cm = confMat retCM.append(cm) return retCM
def ConfMat(res = None): """ Returns a confusion matrix in the form of a vector: For Binary classifiers: [[TP, FN], [FP, TN]] For classifiers with class having N values: Predicted class | A B C --------------------------- experimental A | tpA eAB eAC eAB is read as: Error, should be A instead of B class B | eBA tpB eBC C | eCA eCB tpC [[tpA, eAB, ..., eAN], [eBA, tpB, ..., eBN], ..., [eNA, eNB, ..., tpN ]] where A, B, C are the class values in the same order as testData.domain.classVar.values """ if res == None: return {"type":CLASSIFICATION} confMat_s = orngStat.confusionMatrices(res) retCM = [] for confMat in confMat_s: if len(res.classValues) == 2: # NOTE: orngStat returns TN, TP, FN, FP with inverted labels, so we have to set properly: cm = [[confMat.TN, confMat.FP],[confMat.FN, confMat.TP]] else: cm = confMat retCM.append(cm) return retCM
def main(): print "loading" annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = PairwiseEngine(training) training_table = engine.training_table testing_table = engine.makeTable(testing) print len(training_table), "training" print len(testing_table), "testing" learners = [orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy*100)
def __call__(self, examples, weightID=0): maxValue = max([x[self.attributeName] for x in examples]) minValue = min([x[self.attributeName] for x in examples]) steps = 10 bestAccuracy = 0 bestThreshold = None values = ["True", "False"] #examples.domain.classVar.values assert len(values) == 2 reversedValues = [x for x in reversed(values)] classifiers = [] for threshold in arange(minValue, maxValue, (maxValue - minValue) / steps): classifiers.append( ThresholdClassifier(self.attributeName, threshold, values, self.classifyFunction)) classifiers.append( ThresholdClassifier(self.attributeName, threshold, reversedValues, self.classifyFunction)) maxFScore = 0 bestClassifier = None for x in classifiers: results = orngTest.testOnData([x], examples) fscore = [ fScore(cm) for cm in orngStat.confusionMatrices(results, classIndex=0) ][0] if maxFScore <= fscore: maxFScore = fscore bestClassifier = x return bestClassifier
def setTestResults(self, res): self.res = res self.warning([0, 1]) self.outputBox.setEnabled(True) if res is not None and res.class_values is None: self.warning(1, "Confusion Matrix cannot be used for regression results.") self.res = res = None if not res: self.matrix = None self.learnerNames = [] self.table.setRowCount(0) self.table.setColumnCount(0) return if res and res.test_type != TEST_TYPE_SINGLE: self.warning(0, "Confusion matrix can be calculated only for single-target prediction problems.") return canOutput = True if not hasattr(res, "examples"): self.warning(1, "Results do not have testing instances (Output is disabled).") canOutput = False elif not isinstance(res.examples, orange.ExampleTable): self.warning(1, "Output for results from 'Proportion test' is not supported.") canOutput = False self.outputBox.setEnabled(canOutput) self.matrix = orngStat.confusionMatrices(res, -2) dim = len(res.classValues) self.table.setRowCount(dim+1) self.table.setColumnCount(dim+1) self.table.setHorizontalHeaderLabels(res.classValues+[""]) self.table.setVerticalHeaderLabels(res.classValues+[""]) for ri in range(dim+1): for ci in range(dim+1): it = QTableWidgetItem() it.setFlags(Qt.ItemIsEnabled | (ri<dim and ci<dim and Qt.ItemIsSelectable or Qt.NoItemFlags)) it.setTextAlignment(Qt.AlignRight) self.table.setItem(ri, ci, it) boldf = self.table.item(0, dim).font() boldf.setBold(True) for ri in range(dim+1): self.table.item(ri, dim).setFont(boldf) self.table.item(dim, ri).setFont(boldf) self.learnerNames = res.classifierNames[:] if not self.selectedLearner and self.res.numberOfLearners: self.selectedLearner = [0] self.learnerChanged() self.table.clearSelection()
def thresholdConfusionMatrices(results, classIndex=0, stepSize=0.01): thresholds = list(na.arange(0 - stepSize, 1 + stepSize, stepSize)) # if zero is in here, kill it because it make sit go back to the # default behavior. thresholds = [t for t in thresholds if t != 0] return thresholds, [ orngStat.confusionMatrices(results, classIndex, cutoff=threshold)[0] for threshold in thresholds ]
def displayResults(results): for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results, classIndex=0)): print "accuracy", accuracy print " TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN) print "precision", orngStat.precision(cm) print "recall", orngStat.recall(cm) print "f1", fScore(cm) print
def setTestResults(self, res): self.res = res if not res: self.matrix = None self.table.setRowCount(0) self.table.setColumnCount(0) return if res and res.test_type != TEST_TYPE_SINGLE: self.warning( 0, "Confusion matrix can be calculated only for single-target prediction problems." ) return self.warning(0, None) self.matrix = orngStat.confusionMatrices(res, -2) dim = len(res.classValues) self.table.setRowCount(dim + 1) self.table.setColumnCount(dim + 1) self.table.setHorizontalHeaderLabels(res.classValues + [""]) self.table.setVerticalHeaderLabels(res.classValues + [""]) for ri in range(dim + 1): for ci in range(dim + 1): it = QTableWidgetItem() it.setFlags(Qt.ItemIsEnabled | (ri < dim and ci < dim and Qt.ItemIsSelectable or Qt.NoItemFlags)) it.setTextAlignment(Qt.AlignRight) self.table.setItem(ri, ci, it) boldf = self.table.item(0, dim).font() boldf.setBold(True) for ri in range(dim + 1): self.table.item(ri, dim).setFont(boldf) self.table.item(dim, ri).setFont(boldf) self.learnerNames = res.classifierNames[:] if not self.selectedLearner and self.res.numberOfLearners: self.selectedLearner = [0] self.learnerChanged() self.table.clearSelection()
def crossValidateWithSeparateTrainingAndTesting(training, testing, learner, folds=10): assert len(training) == len(testing) indices = orange.MakeRandomIndicesCV(training, folds=folds) cm = orngStat.ConfusionMatrix() for i in range(folds): trainingFold = training.select(indices, i, negate=1) testingFold = testing.select(indices, i) results = orngTest.learnAndTestOnTestData([learner], trainingFold, testingFold) fCm = orngStat.confusionMatrices(results, classIndex=0)[0] cm.TP += fCm.TP cm.FP += fCm.FP cm.FN += fCm.FN cm.TN += fCm.TN return cm
def main(): print "loading" annotations = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) annotator2 = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_dlaude.partial.txt" % TKLIB_HOME) #histogram(annotations) print "table" table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = WordnetParentsEngine(training) training_table = engine.makeTable(training) testing_table = engine.makeTable(testing) #training_table, testing_table = wordnet_parents(training, testing) #training_table, testing_table = wordnet_glosses(training, testing) #training_table, testing_table = flickr_parents(training, testing) print len(training_table), "training examples" print len(testing_table), "testing examples" #training_table = annotation_reader.to_big_small(training_table) #testing_table = annotation_reader.to_big_small(testing_table) #information_gain = orange.MeasureAttribute_info() #for x in training_table.domain.attributes: # print "x", information_gain(x, training_table) learners = [ orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), WordnetKnnClassifier, agreement.WizardOfOzLearner(annotator2.as_orange_table()) ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy * 100)
def setTestResults(self, res): ##scPA self.warning(0) self.error(0) ##ecPA self.res = res if not res: self.matrix = None self.table.setRowCount(0) self.table.setColumnCount(0) return self.matrix = orngStat.confusionMatrices(res, -2) dim = len(res.classValues) self.table.setRowCount(dim + 1) self.table.setColumnCount(dim + 1) self.table.setHorizontalHeaderLabels(res.classValues + [""]) self.table.setVerticalHeaderLabels(res.classValues + [""]) for ri in range(dim + 1): for ci in range(dim + 1): it = QTableWidgetItem() it.setFlags(Qt.ItemIsEnabled | (ri < dim and ci < dim and Qt.ItemIsSelectable or Qt.NoItemFlags)) it.setTextAlignment(Qt.AlignRight) self.table.setItem(ri, ci, it) boldf = self.table.item(0, dim).font() boldf.setBold(True) for ri in range(dim + 1): self.table.item(ri, dim).setFont(boldf) self.table.item(dim, ri).setFont(boldf) self.learnerNames = res.classifierNames[:] if not self.selectedLearner and self.res.numberOfLearners: self.selectedLearner = [0] self.learnerChanged() self.table.clearSelection()
def setTestResults(self, res): self.res = res if not res: self.matrix = None self.table.setRowCount(0) self.table.setColumnCount(0) return if res and res.test_type != TEST_TYPE_SINGLE: self.warning(0, "Confusion matrix can be calculated only for single-target prediction problems.") return self.warning(0, None) self.matrix = orngStat.confusionMatrices(res, -2) dim = len(res.classValues) self.table.setRowCount(dim + 1) self.table.setColumnCount(dim + 1) self.table.setHorizontalHeaderLabels(res.classValues + [""]) self.table.setVerticalHeaderLabels(res.classValues + [""]) for ri in range(dim + 1): for ci in range(dim + 1): it = QTableWidgetItem() it.setFlags(Qt.ItemIsEnabled | (ri < dim and ci < dim and Qt.ItemIsSelectable or Qt.NoItemFlags)) it.setTextAlignment(Qt.AlignRight) self.table.setItem(ri, ci, it) boldf = self.table.item(0, dim).font() boldf.setBold(True) for ri in range(dim + 1): self.table.item(ri, dim).setFont(boldf) self.table.item(dim, ri).setFont(boldf) self.learnerNames = res.classifierNames[:] if not self.selectedLearner and self.res.numberOfLearners: self.selectedLearner = [0] self.learnerChanged() self.table.clearSelection()
def setTestResults(self, res): self.res = res self.warning([0, 1]) self.outputBox.setEnabled(True) if res is not None and res.class_values is None: self.warning( 1, "Confusion Matrix cannot be used for regression results.") self.res = res = None if not res: self.matrix = None self.learnerNames = [] self.table.setRowCount(0) self.table.setColumnCount(0) return if res and res.test_type != TEST_TYPE_SINGLE: self.warning( 0, "Confusion matrix can be calculated only for single-target prediction problems." ) return canOutput = True if not hasattr(res, "examples"): self.warning( 1, "Results do not have testing instances (Output is disabled).") canOutput = False elif not isinstance(res.examples, orange.ExampleTable): self.warning( 1, "Output for results from 'Proportion test' is not supported.") canOutput = False self.outputBox.setEnabled(canOutput) self.matrix = orngStat.confusionMatrices(res, -2) dim = len(res.classValues) self.table.setRowCount(dim + 1) self.table.setColumnCount(dim + 1) self.table.setHorizontalHeaderLabels(res.classValues + [""]) self.table.setVerticalHeaderLabels(res.classValues + [""]) for ri in range(dim + 1): for ci in range(dim + 1): it = QTableWidgetItem() it.setFlags(Qt.ItemIsEnabled | (ri < dim and ci < dim and Qt.ItemIsSelectable or Qt.NoItemFlags)) it.setTextAlignment(Qt.AlignRight) self.table.setItem(ri, ci, it) boldf = self.table.item(0, dim).font() boldf.setBold(True) for ri in range(dim + 1): self.table.item(ri, dim).setFont(boldf) self.table.item(dim, ri).setFont(boldf) self.learnerNames = res.classifierNames[:] if not self.selectedLearner and self.res.numberOfLearners: self.selectedLearner = [0] self.learnerChanged() self.table.clearSelection()
def get_confusion_matrix(results, cutoff = 0.5): cms = orngStat.confusionMatrices(results, cutoff = cutoff) return cms[0]
#!/usr/bin/env python import sys import os import orange import orngStat import orngTest import orngTree for features in sys.argv[1::]: data = orange.ExampleTable(features) learners = [orange.BayesLearner(name = 'bayes')] result = orngTest.crossValidation(learners, data) CAs = orngStat.CA(result) CMs = orngStat.confusionMatrices(result) print orngStat.F1(CMs[0]), CAs[0], features
def get_confusion_matrix(results, cutoff=0.5): cms = orngStat.confusionMatrices(results, cutoff=cutoff) return cms[0]
def classifyContourLists(dataViewer, inputTrainingExamplesIdentifier, contourListsNodePath): """ Classify contour lists Parameters: dataViewer: container for program data including contour lists inputTrainingExamplesIdentifier: basename for file with training data contourListsNodePath: identifies the node with the contour list objects. The classification probabilities that are calculated are stored as properties of the contour lists. """ #identifier = 'test' dataFilePath = os.path.join(default_path.cytosegDataFolder, inputTrainingExamplesIdentifier + ".tab") data = orange.ExampleTable(dataFilePath) #depth = 25 # BMC Bioinformatics submission used for cerebellum #depth = 25 # BMC Bioinformatics submission used for dentate gyrus depth = 25 #5 #test #25 # BMC Bioinformatics submission used for retina minimumExamples = len(data) / depth tree = orngTree.TreeLearner(storeNodeClassifier = 0, storeContingencies=0, storeDistributions=1, minExamples=minimumExamples, ).instance() gini = orange.MeasureAttribute_gini() tree.split.discreteSplitConstructor.measure = \ tree.split.continuousSplitConstructor.measure = gini tree.maxDepth = depth split = 3 tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, split) logging.info("creating random forest") #numTrees = 50 # for BMC Bioinformatics numTrees = 50 #100 #test # 50 forest = orngEnsemble.RandomForestLearner(data, name="forest", learner=tree, trees=numTrees) logging.info("finished creating random forest") print dataFilePath print "number of examples:", len(data) print "tree learner minimumExamples parameter:", minimumExamples print "depth:", depth print "split:", split print "number of trees:", numTrees count = 0 print "data.domain.attributes", data.domain.attributes, len(data.domain.attributes) print "data.domain.variables", data.domain.variables, len(data.domain.variables) print "Possible classes:", data.domain.classVar.values if len(data.domain.classVar.values) == 1: print '<ERROR ID=NO_VALID_TRAINING_CONTOURS_WERE_DETECTED TEXT="Contour extraction was performed on the the training data but none of the contours detected fully overlap with training contours given.">' raise Exception("There is only one class in the test data.") # optionally, calculate accuracy on the training data calculateAccuracyOnTrainingData = False if calculateAccuracyOnTrainingData: import matplotlib.pyplot as pyplot #voting = orange.ExampleTable("voting3") print "Training Data Accuracy:" learners = [orngEnsemble.RandomForestLearner(name="test_forest", learner=tree, trees=50)] # not correct: # learners = [forest] results = orngTest.crossValidation(learners, data, folds=5) falsePositiveRates = [] truePositiveRates = [] for cutoffInteger in range(1, 20, 1): cutoff = float(cutoffInteger) * 0.05 #cm = orngStat.confusionMatrices(results, cutoff=cutoff, classIndex=1)[0] cm = orngStat.confusionMatrices(results, cutoff=cutoff)[0] #cm = orngStat.confusionMatrices(results)[0] print "cutoff %f:" % cutoff print "TP: %i, FP: %i" % (cm.TP, cm.FP) print dir(cm) falsePositiveRates.append(array(cm.FP, dtype=float) / float(cm.TN + cm.FP)) print "negatives (cm.TN + cm.FP):", cm.TN + cm.FP truePositiveRates.append(array(cm.TP, dtype=float) / float(cm.FN + cm.TP)) print "positives (cm.FN + cm.TP):", cm.FN + cm.TP print "truePositiveRate:", float(cm.TP) / float(cm.FN + cm.TP) pyplot.hold(True) pyplot.plot(falsePositiveRates, truePositiveRates) pyplot.plot(falsePositiveRates, truePositiveRates, 'bo') pyplot.hold(False) print "falsePositiveRates = ", falsePositiveRates print "truePositiveRates = ", truePositiveRates #pyplot.title(target) pyplot.xlabel('False Positive Rate') pyplot.ylabel('True Positive Rate') pyplot.grid(True) #pyplot.axis([0, 1, 0, 1]) pyplot.axis([0, falsePositiveRates[0], 0.7, 1]) if 0: pyplot.show() print "loading contour lists" contourListsNode =\ dataViewer.mainDoc.dataTree.getSubtree(contourListsNodePath) logging.info("classifying contour sets") print contourListsNodePath print "setting contour list probabilities" print "number of contour lists:", len(contourListsNode.children) for contourListNode in contourListsNode.children: if count % 100 == 0: print "contour list number:", count dictionary = getContourListFeatures(contourListNode) list = [] for item in dictionary.items(): value = item[1] list.append(value) list.append('0') example = orange.Example(data.domain, list) p = forest(example, orange.GetProbabilities) # todo: this should be checked once immediately after the training data file is read rather than checked here if len(p) == 1: raise Exception, "There is only one class in the data. There should be two classes like true and false." contourListNode.object.setProbability(p[1]) print "classifying, contour list probability:", p[1] colorScaleFactor = 5.0 if 0: contourListNode.object.setColor([200 - ((colorScaleFactor * p[1]) * 200), (colorScaleFactor * p[1]) * 200, 70]) count += 1 logging.info("finished classifying contour sets")
print "%s\t%5.3f\t%5.3f\t%5.3f\t%6.3f" % (learners[l].name, CAs[l], APs[l], Briers[l], ISs[l]) CAs = orngStat.CA(res, reportSE=True) APs = orngStat.AP(res, reportSE=True) Briers = orngStat.BrierScore(res, reportSE=True) ISs = orngStat.IS(res, reportSE=True) print print "method\tCA\tAP\tBrier\tIS" for l in range(len(learners)): print "%s\t%5.3f+-%5.3f\t%5.3f+-%5.3f\t%5.3f+-%5.3f\t%6.3f+-%5.3f" % ((learners[l].name, ) + CAs[l] + APs[l] + Briers[l] + ISs[l]) print cm = orngStat.confusionMatrices(res)[0] print "Confusion matrix for naive Bayes:" print "TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN) print cm = orngStat.confusionMatrices(res, cutoff=0.2)[0] print "Confusion matrix for naive Bayes:" print "TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN) print cm = orngStat.confusionMatrices(resVeh, vehicle.domain.classVar.values.index("van"))[0] print "Confusion matrix for naive Bayes for 'van':" print "TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN) print cm = orngStat.confusionMatrices(resVeh, vehicle.domain.classVar.values.index("opel"))[0]
import orange, orngWrap, orngTest, orngStat data = orange.ExampleTable("bupa") ri2 = orange.MakeRandomIndices2(data, 0.7) train = data.select(ri2, 0) test = data.select(ri2, 1) bayes = orange.BayesLearner(train) thresholds = [.2, .5, .8] models = [orngWrap.ThresholdClassifier(bayes, thr) for thr in thresholds] res = orngTest.testOnData(models, test) cm = orngStat.confusionMatrices(res) print for i, thr in enumerate(thresholds): print "%1.2f: TP %5.3f, TN %5.3f" % (thr, cm[i].TP, cm[i].TN)
def nway(): engine_to_examples = {} trainer = Trainer() classes = set() for i, key in enumerate(trainer.annotationEngines): engine = trainer.engineMap[key] table = trainer.makeTable(engine) for ex in table: if ex["farAway"].value: cls = "null" else: cls = ex["sourceEngineName"].value geometry = ex["geometry"].value engine_to_examples.setdefault(cls, []) classes.add(cls) examples = [ trainer.engineMap[key].makeExample(expectInsane=True, **geometry) for key in trainer.annotationEngines if not len(geometry["figure"]) == 0 ] engine_to_examples[cls].append(examples) if i >= 1: #break pass variables = [] for ex in examples: for attr in ex.domain: if attr.name == "class": continue new_attr = orange.FloatVariable(attr.name) variables.append(new_attr) domain = orange.Domain(variables, orange.EnumVariable("class", values=list(classes))) table = orange.ExampleTable(domain) for engine_name, example_lists in engine_to_examples.iteritems(): for example_list in example_lists: ex = orange.Example(domain) for engine_ex in example_list: for attr in engine_ex.domain: ex[attr.name] = engine_ex[attr.name] ex["class"] = engine_name table.append(ex) print "domain", domain cv_indices = orange.MakeRandomIndices2(table, p0=0.75) training = table.select(cv_indices, 0, negate=True) testing = table.select(cv_indices, 0, negate=False) #classifier = orngBayes.BayesLearner(training) classifier = orangePickle.PickleableClassifier(training, orngBayes.BayesLearner) results = orngTest.testOnData([classifier], testing) print orngStat.CA(results) cm = orngStat.confusionMatrices(results)[0] classes = list(domain.classVar.values) print " ", " ".join([c.rjust(12) for c in classes + ["", ""]]) for className, classConfusions in zip(classes, cm): #format = ("%s" + ("\t%i" * len(classes))) values = (className, ) + tuple(classConfusions) print " ".join([str(c).rjust(12) for c in values]) #print format % values for name in classes: classIndex = classes.index(name) mpl.figure() rocCurve(results, "", classIndex, stepSize=0.001, plotArgs=dict(linewidth=5, markersize=10)) mpl.title(name, size=30) mpl.xlabel("FP", fontsize=30) mpl.ylabel("TP", fontsize=30) mpl.xticks([0, 1], fontsize=17) mpl.yticks([0, 1], fontsize=17) fname = "nway.pck" print "saving", fname with open(fname, "w") as f: pickle.dump(classifier, f, protocol=2) mpl.show()
gini = orange.MeasureAttribute_gini() tree.split.discreteSplitConstructor.measure = \ tree.split.continuousSplitConstructor.measure = gini tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, 3) #forest = orngEnsemble.RandomForestLearner(data, trees=50, # name="forest", learner=tree) learners = [orngEnsemble.RandomForestLearner(name="random_forest", trees=10, learner=tree), orngEnsemble.RandomForestLearner(name="random_forest", trees=10, learner=tree)] voting = orange.ExampleTable("voting3") res = orngTest.crossValidation(learners, voting) import orngStat for cutoffInteger in range(0, 10, 1): cutoff = float(cutoffInteger) * 0.1 cm = orngStat.confusionMatrices(res, cutoff=cutoff)[0] #print type(orngStat.confusionMatrices(res, cutoff=cutoff)) #print cm #print type(cm) #print type(cm[0]) print "cutoff %f:" % cutoff print "TP: %i, FP: %i" % (cm.TP, cm.FP)