def WrapperFSS(data, learner, verbose=0, folds=10): classVar = data.domain.classVar currentAtt = [] freeAttributes = list(data.domain.attributes) newDomain = orange.Domain(currentAtt + [classVar]) d = data.select(newDomain) results = orngTest.crossValidation([learner], d, folds=folds) maxStat = orngStat.CA(results)[0] if verbose>=2: print "start (%5.3f)" % maxStat while 1: stat = [] for a in freeAttributes: newDomain = orange.Domain([a] + currentAtt + [classVar]) d = data.select(newDomain) results = orngTest.crossValidation([learner], d, folds=folds) stat.append(orngStat.CA(results)[0]) if verbose>=2: print " %s gained %5.3f" % (a.name, orngStat.CA(results)[0]) if (max(stat) > maxStat): oldMaxStat = maxStat maxStat = max(stat) bestVarIndx = stat.index(max(stat)) if verbose: print "gain: %5.3f, attribute: %s" % (maxStat-oldMaxStat, freeAttributes[bestVarIndx].name) currentAtt = currentAtt + [freeAttributes[bestVarIndx]] del freeAttributes[bestVarIndx] else: if verbose: print "stopped (%5.3f)" % (max(stat) - maxStat) return orange.Domain(currentAtt + [classVar]) break
def main(): print "loading" annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = PairwiseEngine(training) training_table = engine.training_table testing_table = engine.makeTable(testing) print len(training_table), "training" print len(testing_table), "testing" learners = [orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy*100)
def test_classifier(model, data): res = orngTest.testOnData( (model, ), data ) # testOnData requires a list of models, so convert model into a tuple of length 1 class_accuracy = orngStat.CA(res)[0] return class_accuracy, res
def test_fss(learner, data, t=0.01): fss = orngFSS.FilterAttsAboveThresh(threshold=t) fLearner = orngFSS.FilteredLearner(learner, filter=fss, name='%s & fss' % (learner.name)) learners = [learner, fLearner] results = orngTest.crossValidation(learners, data, folds=10, storeClassifiers=1) # how many attributes did each classifier use? natt = [0.] * len(learners) for fold in range(results.numberOfIterations): for lrn in range(len(learners)): natt[lrn] += len(results.classifiers[fold][lrn].domain.attributes) for lrn in range(len(learners)): natt[lrn] = natt[lrn] / 10. print "\nLearner Accuracy #Atts" for i in range(len(learners)): print "%-15s %5.3f %5.2f" % (learners[i].name, orngStat.CA(results)[i], natt[i]) # which attributes were used in filtered case? print '\nAttribute usage (how many folds attribute was used):' used = {} for fold in range(results.numberOfIterations): for att in results.classifiers[fold][1].domain.attributes: a = att.name if a in used.keys(): used[a] += 1 else: used[a] = 1 for a in used.keys(): print '%2d x %s' % (used[a], a)
def print_results(learners, results): # output the results print "Learner CA IS Brier AUC" for i in range(len(learners)): print "%-8s %5.4f %5.3f %5.3f %5.3f" % ( learners[i].name, orngStat.CA(results)[i], orngStat.IS(results)[i], orngStat.BrierScore(results)[i], orngStat.AUC(results)[i])
def cforange_classification_accuracy(input_dict): import orngStat results = input_dict['results'] CAs = orngStat.CA(results) if len(CAs)==1: CAs = CAs[0] output_dict = {} output_dict['ca']=CAs return output_dict
def CA(res=None): """ Truncate the orange method to 3 decimals. Allow for no input arguments. Used by the optimizer. """ if res == None: return {"type": CLASSIFICATION} else: scores = orngStat.CA(res) return [round(x, 3) for x in scores]
def get_stats(results): result_dict = {} cm = get_confusion_matrix(results) result_dict['Accuracy'] = orngStat.CA(results)[0] result_dict['Sensitivity'] = orngStat.sens(cm) result_dict['Specificity'] = orngStat.spec(cm) return result_dict
def displayResults(results): for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results, classIndex=0)): print "accuracy", accuracy print " TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN) print "precision", orngStat.precision(cm) print "recall", orngStat.recall(cm) print "f1", fScore(cm) print
def main(phase, make): if (phase == 4): f = FeatureExtractor2.FeatureExtractor(createFile=make) ft = FeatureExtractor3.FeatureExtractor(createFile=make) idlist = f.IDs idlist2 = ft.IDs FeatureTable = orange.ExampleTable("table2") TestTable = orange.ExampleTable("table3") training, test = SplitDataInHalf(FeatureTable, f.size) learner = orngTree.TreeLearner(training) res = orngTest.testOnData([learner], test) if make == True: learner = orngTree.TreeLearner(FeatureTable) res = orngTest.testOnData([learner], TestTable) res2 = orngTest.testOnData([learner], FeatureTable) WriteToFile("dev_tonder_olsen.txt", res2, idlist) WriteToFile("test_tonder_olsen.txt", res, idlist2) printresult() else: f = featureExtractor.FeatureExtractor(createFile=True) FeatureTable = orange.ExampleTable("table") learner, res = CrossValidation(FeatureTable, f.size, 10) guessyes = 0 guessno = 0 correctyes = 0 correctno = 0 for r in res.results: if str(r.classes[0]) == "1": prtres = "Yes" else: prtres = "No" if str(r.actualClass) == "1": prttrue = "Yes" correctyes = correctyes + 1 else: prttrue = "No" correctno = correctno + 1 #print str(r.classes[0]) + " vs correct: " + str(r.actualClass) if prtres == "No" and prttrue == "No": guessno = guessno + 1 elif prtres == "Yes" and prttrue == "Yes": guessyes = guessyes + 1 print "Guessed " + prtres + " and the correct answer was: " + prttrue #res = orngTest.leaveOneOut([learner],FeatureTable) #printresult = orngStat.CA(res, orngStat.IS(res)) #print "Yes Accuracy: " + str(float(guessyes)/float(correctyes)) #print "No Accuracy: " + str(float(guessno)/float(correctno)) printresult = orngStat.CA(res) print "Accuracy: " + str(printresult[0])
def cforange_classification_accuracy(input_dict): import orngStat results = input_dict['results'] if input_dict['reportSE']=='true': reportSE = True else: reportSE = False CAs = orngStat.CA(results,reportSE=reportSE) if len(CAs)==1: CAs = CAs[0] output_dict = {} output_dict['ca']=CAs return output_dict