Beispiel #1
0
def WrapperFSS(data, learner, verbose=0, folds=10):
  classVar = data.domain.classVar
  currentAtt = []
  freeAttributes = list(data.domain.attributes)

  newDomain = orange.Domain(currentAtt + [classVar])
  d = data.select(newDomain)
  results = orngTest.crossValidation([learner], d, folds=folds)
  maxStat = orngStat.CA(results)[0]
  if verbose>=2:
    print "start (%5.3f)" % maxStat

  while 1:
    stat = []
    for a in freeAttributes:
      newDomain = orange.Domain([a] + currentAtt + [classVar])
      d = data.select(newDomain)
      results = orngTest.crossValidation([learner], d, folds=folds)
      stat.append(orngStat.CA(results)[0])
      if verbose>=2:
        print "  %s gained %5.3f" % (a.name, orngStat.CA(results)[0])

    if (max(stat) > maxStat):
      oldMaxStat = maxStat
      maxStat = max(stat)
      bestVarIndx = stat.index(max(stat))
      if verbose:
        print "gain: %5.3f, attribute: %s" % (maxStat-oldMaxStat, freeAttributes[bestVarIndx].name)
      currentAtt = currentAtt + [freeAttributes[bestVarIndx]]
      del freeAttributes[bestVarIndx]
    else:
      if verbose:
        print "stopped (%5.3f)" % (max(stat) - maxStat)
      return orange.Domain(currentAtt + [classVar])
      break
Beispiel #2
0
def main():
    print "loading"
    annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME)
    table = annotations.as_orange_table()
    cv_indices = orange.MakeRandomIndices2(table, p0=0.5)
    print "indices", set(cv_indices)
    print "splitting"
    training, testing = annotation_reader.split(annotations, cv_indices)
    print "features"

    engine = PairwiseEngine(training)
    
    training_table = engine.training_table
    testing_table = engine.makeTable(testing)
    print len(training_table), "training"
    print len(testing_table), "testing"
    
    learners = [orange.MajorityLearner(),
                orngEnsemble.RandomForestLearner(),
                ]
    results = orngTest.learnAndTestOnTestData(learners, 
                                              training_table, testing_table)

    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results)):
        print orangeUtils.confusion_matrix_to_string(table.domain, cm)
        print "accuracy: %.2f%%" % (accuracy*100)
Beispiel #3
0
def test_classifier(model, data):
    res = orngTest.testOnData(
        (model, ), data
    )  # testOnData requires a list of models, so convert model into a tuple of length 1

    class_accuracy = orngStat.CA(res)[0]
    return class_accuracy, res
Beispiel #4
0
Datei: cv.py Projekt: sloria/usv
def test_fss(learner, data, t=0.01):
    fss = orngFSS.FilterAttsAboveThresh(threshold=t)
    fLearner = orngFSS.FilteredLearner(learner,
                                       filter=fss,
                                       name='%s & fss' % (learner.name))
    learners = [learner, fLearner]
    results = orngTest.crossValidation(learners,
                                       data,
                                       folds=10,
                                       storeClassifiers=1)

    # how many attributes did each classifier use?
    natt = [0.] * len(learners)
    for fold in range(results.numberOfIterations):
        for lrn in range(len(learners)):
            natt[lrn] += len(results.classifiers[fold][lrn].domain.attributes)
    for lrn in range(len(learners)):
        natt[lrn] = natt[lrn] / 10.

    print "\nLearner         Accuracy  #Atts"
    for i in range(len(learners)):
        print "%-15s %5.3f     %5.2f" % (learners[i].name,
                                         orngStat.CA(results)[i], natt[i])

    # which attributes were used in filtered case?
    print '\nAttribute usage (how many folds attribute was used):'
    used = {}
    for fold in range(results.numberOfIterations):
        for att in results.classifiers[fold][1].domain.attributes:
            a = att.name
            if a in used.keys(): used[a] += 1
            else: used[a] = 1
    for a in used.keys():
        print '%2d x %s' % (used[a], a)
Beispiel #5
0
def print_results(learners, results):
    # output the results
    print "Learner     CA    IS     Brier  AUC"

    for i in range(len(learners)):
        print "%-8s  %5.4f  %5.3f  %5.3f  %5.3f" % (
            learners[i].name, orngStat.CA(results)[i], orngStat.IS(results)[i],
            orngStat.BrierScore(results)[i], orngStat.AUC(results)[i])
Beispiel #6
0
def cforange_classification_accuracy(input_dict):
    import orngStat
    results = input_dict['results']
    CAs = orngStat.CA(results)
    if len(CAs)==1:
        CAs = CAs[0]
    output_dict = {}
    output_dict['ca']=CAs
    return output_dict
Beispiel #7
0
def CA(res=None):
    """
    Truncate the orange method to 3 decimals. Allow for no input arguments. Used by the optimizer.
    """
    if res == None:
        return {"type": CLASSIFICATION}
    else:
        scores = orngStat.CA(res)
        return [round(x, 3) for x in scores]
Beispiel #8
0
def get_stats(results):
    result_dict = {}
    cm = get_confusion_matrix(results)

    result_dict['Accuracy'] = orngStat.CA(results)[0]
    result_dict['Sensitivity'] = orngStat.sens(cm)
    result_dict['Specificity'] = orngStat.spec(cm)

    return result_dict
Beispiel #9
0
def displayResults(results):
    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results, classIndex=0)):

        print "accuracy", accuracy
        print " TP: %i, FP: %i, FN: %s, TN: %i" % (cm.TP, cm.FP, cm.FN, cm.TN)
        print "precision", orngStat.precision(cm)
        print "recall", orngStat.recall(cm)
        print "f1", fScore(cm)
        print
def main(phase, make):

    if (phase == 4):
        f = FeatureExtractor2.FeatureExtractor(createFile=make)
        ft = FeatureExtractor3.FeatureExtractor(createFile=make)
        idlist = f.IDs
        idlist2 = ft.IDs
        FeatureTable = orange.ExampleTable("table2")
        TestTable = orange.ExampleTable("table3")
        training, test = SplitDataInHalf(FeatureTable, f.size)
        learner = orngTree.TreeLearner(training)
        res = orngTest.testOnData([learner], test)
        if make == True:
            learner = orngTree.TreeLearner(FeatureTable)
            res = orngTest.testOnData([learner], TestTable)
            res2 = orngTest.testOnData([learner], FeatureTable)
            WriteToFile("dev_tonder_olsen.txt", res2, idlist)
            WriteToFile("test_tonder_olsen.txt", res, idlist2)
            printresult()
    else:
        f = featureExtractor.FeatureExtractor(createFile=True)
        FeatureTable = orange.ExampleTable("table")
        learner, res = CrossValidation(FeatureTable, f.size, 10)

        guessyes = 0
        guessno = 0
        correctyes = 0
        correctno = 0
        for r in res.results:
            if str(r.classes[0]) == "1":
                prtres = "Yes"
            else:
                prtres = "No"

            if str(r.actualClass) == "1":
                prttrue = "Yes"
                correctyes = correctyes + 1
            else:
                prttrue = "No"
                correctno = correctno + 1
            #print str(r.classes[0]) + " vs correct: " + str(r.actualClass)
            if prtres == "No" and prttrue == "No":
                guessno = guessno + 1
            elif prtres == "Yes" and prttrue == "Yes":
                guessyes = guessyes + 1
            print "Guessed " + prtres + " and the correct answer was: " + prttrue
        #res = orngTest.leaveOneOut([learner],FeatureTable)
        #printresult = orngStat.CA(res, orngStat.IS(res))
        #print "Yes Accuracy: " + str(float(guessyes)/float(correctyes))
        #print "No Accuracy: " + str(float(guessno)/float(correctno))
        printresult = orngStat.CA(res)
        print "Accuracy: " + str(printresult[0])
Beispiel #11
0
def cforange_classification_accuracy(input_dict):
    import orngStat
    results = input_dict['results']
    if input_dict['reportSE']=='true':
        reportSE = True
    else:
        reportSE = False
    CAs = orngStat.CA(results,reportSE=reportSE)
    if len(CAs)==1:
        CAs = CAs[0]
    output_dict = {}
    output_dict['ca']=CAs
    return output_dict