Exemple #1
0
def getAverageClassificaionRate(dataset,
                                runs=20,
                                testSize=200,
                                setFunc=setEntropy,
                                infoFunc=infoGain):
    """
    Randomly selects a test set and removes it from the training set.
    """
    scores = []
    examples, attrValues, labelName, labelValues = dataset
    l = len(examples) - 1
    print 'Starting test for average error for %d runs with test size %d' % (
        runs, testSize)
    for r in xrange(runs):
        runExamples = examples[:]
        test = []
        for i in xrange(testSize):
            test.append(runExamples.pop(random.randint(0, l - i)))
        tree = makeTree(runExamples, attrValues, labelName, setFunc, infoFunc)
        score = evaluateTree(tree, test, labelName)[0]
        print 'Score for run %d is %f' % (r + 1, score)
        scores.append(score)
    average = sum(scores) / float(runs)
    print 'Average classification rate over all runs: %f' % (average)
    return (scores, average)
Exemple #2
0
def testDummySet2(setFunc=setEntropy, infoFunc=infoGain):
    """Correct classification rate is 0.55"""
    examples, attrValues, labelName, labelValues = getDummyDataset2()
    print 'Testing dummy dataset 2. Number of examples %d.' % len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    print 'Tree is as follows:\n%s\n' % str(tree)
    print 'Tree size: %d.\n' % tree.count()
    examples, attrValues, labelName, labelValues = getDummyDataset2(test=True)
    evaluation = evaluateTree(tree, examples, labelName)
    print 'Results for training set:\n%s\n' % str(evaluation)
    printDemarcation()
    return (tree, evaluation)
Exemple #3
0
def testDummySet2(setFunc = setEntropy, infoFunc = infoGain):
    """Correct classification rate is 0.55"""
    examples,attrValues,labelName,labelValues = getDummyDataset2()
    print 'Testing dummy dataset 2. Number of examples %d.'%len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    print 'Tree is as follows:\n%s\n'%str(tree)
    print 'Tree size: %d.\n'%tree.count()
    examples,attrValues,labelName,labelValues = getDummyDataset2(test=True)
    evaluation = evaluateTree(tree,examples,labelName)
    print 'Results for training set:\n%s\n'%str(evaluation)
    printDemarcation()
    return (tree,evaluation)
Exemple #4
0
def testConnect4(setFunc = setEntropy, infoFunc = infoGain):
    """Correct classification averate rate is about 0.75"""
    examples,attrValues,labelName,labelValues = getConnect4Dataset() 
    print 'Testing Connect4 dataset. Number of examples %d.'%len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open('connect4.out','w')
    print 'Tree size: %d.\n'%tree.count()
    print 'Entire tree written out to connect4.out in local directory\n'
    f.write(str(tree))
    f.close()
    evaluation = getAverageClassificaionRate((examples,attrValues,labelName,labelValues),runs=10,testSize=2000)
    printDemarcation()
    return (tree,evaluation)
Exemple #5
0
def testConnect4(setFunc = setEntropy, infoFunc = infoGain):
    """Correct classification averate rate is about 0.75"""
    examples,attrValues,labelName,labelValues = getConnect4Dataset() 
    print 'Testing Connect4 dataset. Number of examples %d.'%len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open('connect4.out','w')
    print 'Tree size: %d.\n'%tree.count()
    print 'Entire tree written out to connect4.out in local directory\n'
    f.write(str(tree))
    f.close()
    evaluation = getAverageClassificaionRate((examples,attrValues,labelName,labelValues),runs=10,testSize=2000)
    print 'Results for training set:\n%s\n'%str(evaluation)
    printDemarcation()
    return (tree,evaluation)
Exemple #6
0
def testExtraCredit(setFunc=setEntropy, infoFunc=infoGain):
    examples, attrValues, labelName, labelValues = getExtraCreditDataset()
    print 'Testing Poker dataset. Number of examples %d.' % len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open('poker.out', 'w')
    f.write(str(tree))
    f.close()
    print 'Tree size: %d.\n' % tree.count()
    print 'Entire tree written out to poker.out in local directory\n'
    evaluation = Testing.getAverageClassificaionRate(
        (examples, attrValues, labelName, labelValues))
    print 'Results for training set:\n%s\n' % str(evaluation)
    Testing.printDemarcation()
    return (tree, evaluation)
def testCar(setFunc=setEntropy, infoFunc=infoGain):
    """Correct classification averate rate is about 0.89"""
    examples, attrValues, labelName, labelValues = getCarDataset()
    print "Testing Car dataset. Number of examples %d." % len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open("car.out", "w")
    f.write(str(tree))
    f.close()
    print "Tree size: %d.\n" % tree.count()
    print "Entire tree written out to car.out in local directory\n"
    dataset = getCarDataset()
    evaluation = getAverageClassificaionRate((examples, attrValues, labelName, labelValues))
    printDemarcation()
    return (tree, evaluation)
Exemple #8
0
def testCar(setFunc=setEntropy, infoFunc=infoGain):
    """Correct classification averate rate is about 0.89"""
    examples, attrValues, labelName, labelValues = getCarDataset()
    print 'Testing Car dataset. Number of examples %d.' % len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open('car.out', 'w')
    f.write(str(tree))
    f.close()
    print 'Tree size: %d.\n' % tree.count()
    print 'Entire tree written out to car.out in local directory\n'
    dataset = getCarDataset()
    evaluation = getAverageClassificaionRate(
        (examples, attrValues, labelName, labelValues))
    printDemarcation()
    return (tree, evaluation)
def testCar(setFunc = setEntropy, infoFunc = infoGain):
    """Correct classification averate rate is about 0.95"""
    examples,attrValues,labelName,labelValues = getCarDataset()
    print 'Testing Car dataset. Number of examples %d.'%len(examples)
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    f = open('car.out','w')
    f.write(str(tree))
    f.close()
    print 'Tree size: %d.\n'%tree.count()
    print 'Entire tree written out to car.out in local directory\n'
    dataset = getCarDataset()
    evaluation = getAverageClassificaionRate((examples,attrValues,labelName,labelValues))
    print 'Results for training set:\n%s\n'%str(evaluation)
    printDemarcation()
    return (tree,evaluation)
Exemple #10
0
def testAdultSet(setFunc=setEntropy, infoFunc=infoGain):
    """Correct classification averate rate is about 0.95"""
    examples, attrValues, labelName, labelValues = getExtraCreditDataset()
    print 'Testing Adult dataset. Number of examples %d.' % len(examples)
    start = time.time()
    tree = makeTree(examples, attrValues, labelName, setFunc, infoFunc)
    end = time.time()
    print "Training time: ", (end - start)
    f = open('adult.out', 'w')
    f.write(str(tree))
    f.close()
    print 'Tree size: %d.\n' % tree.count()
    print 'Entire tree written out to adult.out in local directory\n'
    dataset = getExtraCreditDataset()
    evaluation = getAverageClassificaionRate(
        (examples, attrValues, labelName, labelValues))
    print 'Results for training set:\n%s\n' % str(evaluation)
    printDemarcation()
    return (tree, evaluation)
Exemple #11
0
def getAverageClassificaionRate(dataset,runs=20,testSize=200,setFunc = setEntropy, infoFunc = infoGain):
    """
    Randomly selects a test set and removes it from the training set.
    """
    scores = []
    examples,attrValues,labelName,labelValues = dataset
    l = len(examples)-1
    print 'Starting test for average error for %d runs with test size %d'%(runs,testSize)
    for r in xrange(runs):
        runExamples = examples[:]
        test = []
        for i in xrange(testSize):
            test.append(runExamples.pop(random.randint(0,l-i)))
        tree = makeTree(runExamples, attrValues, labelName, setFunc, infoFunc)
        score = evaluateTree(tree,test,labelName)[0]
        print 'Score for run %d is %f'%(r+1,score)
        scores.append(score)
    average = sum(scores)/float(runs)
    print 'Average classification rate over all runs: %f'%(average)
    return (scores,average)