Python DecisionTree.createTree Examples

Programming Language: Python

Namespace/Package Name: tree

Class/Type: DecisionTree

Method/Function: createTree

Examples at hotexamples.com: 3

Python DecisionTree.createTree - 3 examples found. These are the top rated real world Python examples of tree.DecisionTree.createTree extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DecisionTree(21)

fit(8)

predict(7)

createTree(3)

changeInputs(2)

score(2)

info(2)

run(2)

build_tree(2)

print_tree(1)

top_down_pessimistic_pruning(1)

show(1)

prune(1)

plot_tree(1)

printTree(1)

printInputs(1)

plant(1)

evaluate(1)

count_leaves(1)

classify(1)

train(1)

Example #1

Show file

File: plot.py Project: lazyjek/DecisionTreeDemo

def part2():
    """randomly choose 5%, 10%, 20%, 50%, 100% samples to train, and choose 10 sets each time"""
    plt.figure()
    for trainFileName, testFileName, key in [
        ('../diabetes_train.arff', '../diabetes_test.arff', 'diabetes'),
        ('../heart_train.arff', '../heart_test.arff', 'heart')
    ]:
        attribute, trainset = data_provider(trainFileName)
        testAttribute, testset = data_provider(testFileName)
        m = 4
        avgPoints = []
        maxPoints = []
        minPoints = []
        for rate in (0.05, 0.1, 0.2, 0.5, 1):
            accuracys = []
            for newTrainset in selectSample(trainset, rate):
                root = TreeNode(newTrainset, attribute)
                curTree = DecisionTree(root)
                curTree.createTree(root, m)
                trueSamples = 0
                falseSamples = 0
                for instance in testset:
                    if curTree.predict(root, instance) == instance[-1]:
                        trueSamples += 1
                    else:
                        falseSamples += 1
                accuracys.append(
                    float(trueSamples) / (trueSamples + falseSamples))
            accuracy = float(sum(accuracys)) / len(accuracys)
            avgPoints.append([int(rate * 100), accuracy])
            maxPoints.append([int(rate * 100), max(accuracys)])
            minPoints.append([int(rate * 100), min(accuracys)])

        mapping = {'diabetes': 1, 'heart': 2}
        ax = plt.subplot(1, 2, mapping[key])
        ax.set_xlim(0, 105)
        ax.set_ylim(0.45, 0.9)
        ax.set_ylabel('accuracy')
        ax.set_title(key)
        ax.plot([x[0] for x in avgPoints], [x[1] for x in avgPoints],
                label='average')
        ax.plot([x[0] for x in maxPoints], [x[1] for x in maxPoints],
                label='maximum')
        ax.plot([x[0] for x in minPoints], [x[1] for x in minPoints],
                label='minimum')
        ax.legend()
    plt.xlabel('dataset sample percentage')
    plt.savefig('../part2.pdf')

Example #2

Show file

File: plot.py Project: lazyjek/DecisionTreeDemo

def part3():
    points = {}
    plt.figure()
    for trainFileName, testFileName, key in [
        ('../diabetes_train.arff', '../diabetes_test.arff', 'diabetes'),
        ('../heart_train.arff', '../heart_test.arff', 'heart')
    ]:
        attribute, trainset = data_provider(trainFileName)
        testAttribute, testset = data_provider(testFileName)
        root = TreeNode(trainset, attribute)
        curTree = DecisionTree(root)

        points = []
        for m in (2, 5, 10, 20):
            curTree.createTree(root, m)
            trueSamples = 0
            falseSamples = 0
            for instance in testset:
                if curTree.predict(root, instance) == instance[-1]:
                    trueSamples += 1
                else:
                    falseSamples += 1
            points.append(
                [m, float(trueSamples) / (trueSamples + falseSamples)])

        mapping = {'diabetes': 1, 'heart': 2}
        for x, y in points:
            ax = plt.subplot(2, 1, mapping[key])
            ax.set_xlim(0, 22)
            ax.set_ylim(0.6, 0.8)
            ax.set_ylabel('accuracy')
            ax.set_title(key)
            plt.annotate('%.3f' % y, xy=(x - 0.02, y + 0.02))
            plt.annotate('m=%d' % x, xy=(x - 0.02, y - 0.07))
            ax.plot(x, y, 'o-')

    plt.xlabel('tree number m')
    plt.savefig('../part3.pdf')

Example #3

Show file

        sys.exit()
    trainFileName = sys.argv[1]
    testFileName = sys.argv[2]
    try:
        m = int(sys.argv[3])
    except:
        print >> sys.stderr, "[ERROR] [m] should be in integer!"
        sys.exit()

    attribute, trainset = data_provider(trainFileName)
    testAttribute, testset = data_provider(testFileName)
    try:
        assert (testAttribute == attribute)
    except AssertionError:
        print >> sys.stderr, "[ERROR] pls check the attributes of test data."
        sys.exit()

    # train
    root = TreeNode(trainset, attribute)
    curTree = DecisionTree(root)
    curTree.createTree(root, m)
    curTree.printTree(root, 0)

    # test
    print '<Predictions for the Test Set Instances>'
    index = 1
    for instance in testset:
        print '{}: Actual: {} Predicted: {}'.format(
            index, instance[-1], curTree.predict(root, instance))
        index += 1