Exemple #1
0
def testClass():
    myDat, labels = tree.createDataSet()
    myTree = tree.createTree(myDat, labels)

    # persistenting the decision tree
    tree.storeTree(myTree, 'myTree.train')

    myTree2 = tree.grabTree('myTree.train')
    testVec = [1, 0]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
    testVec = [1, 1]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
Exemple #2
0
def classify(datapoint, tree):
    if isinstance(tree, Leaf):
        return max(tree.labels.items(), key=operator.itemgetter(1))[0]
    value = datapoint[tree.feature]
    for branch in tree.branches:
        if branch.value == value:
            return classify(datapoint, branch)
Exemple #3
0
from tree import tree, classify
car = ['med', 'med', '4', 'more', 'big', 'high']
print(classify(car, tree))

from collections import Counter
labels = ["unacc", "unacc", "acc", "acc", "good", "good"]
#labels = ["unacc","unacc","unacc", "good", "vgood", "vgood"]
#labels = ["unacc", "unacc", "unacc", "unacc", "unacc", "unacc"]
impurity = 1
label_counts = Counter(labels)
print(label_counts)
for label in label_counts:
    probability_of_label = label_counts[label] / len(labels)
    impurity -= probability_of_label**2
print(impurity)

from collections import Counter
unsplit_labels = [
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "good", "good", "vgood", "vgood", "vgood"
]
split_labels_1 = [[
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "vgood"
], ["good", "good"], ["vgood", "vgood"]]
split_labels_2 = [[
    "unacc", "unacc", "unacc", "unacc", "unacc", "unacc", "good", "good",
    "good", "good"
], ["vgood", "vgood", "vgood"]]

from tree import build_tree, print_tree, car_data, car_labels, classify
import random
random.seed(4)

# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']

indices = [random.randint(0, 999) for i in range(1000)]
predictions = []
for i in range(0, 20):
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    predictions.append(classify(unlabeled_point, subset_tree))
print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)
data_subset = [car_data[index] for index in indices]
labels_subset = [car_labels[index] for index in indices]
print(find_best_split(data_subset, labels_subset))

from tree import build_tree, print_tree, car_data, car_labels, classify
import random
random.seed(4)
# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']
predictions = []
for i in range(20):
    indices = [random.randint(0, 999) for i in range(1000)]
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    predictions.append(classify(unlabeled_point, subset_tree))
print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)

from tree import training_data, training_labels, testing_data, testing_labels, make_random_forest, make_single_tree, classify
import numpy as np
import random
np.random.seed(1)
random.seed(1)
tree = make_single_tree(training_data, training_labels)
forest = make_random_forest(40, training_data, training_labels)
forest_correct = 0
single_tree_correct = 0
for i in range(len(testing_data)):
    prediction = classify(testing_data[i], tree)
Exemple #6
0
import treePlot
import tree

if __name__ == '__main__':
    # 构建树
    my_data, class_labels = tree.create_dataset()
    # my_tree = tree.create_tree(my_data, class_labels)

    # number_leafs = tree.get_number_leafs(my_tree)
    # print(number_leafs)
    #
    # tree_depth = tree.get_tree_depth(my_tree)
    # print(tree_depth)
    # treePlot.create_plot(my_tree)

    my_tree = treePlot.retrieve_tree(0)

    class_label = tree.classify(my_tree, class_labels, [1, 0])
    print(class_label)
Exemple #7
0
# -*- coding:utf-8 -*-

import tree
import treePlotter

feature, labels = tree.create_data_set()
# en = tree.calcShannomEnt(feature)
# print en
# print feature
# print  labels

# feature[0][-1] = "maybe"
# en2 = tree.calcShannomEnt(feature)
# print feature
# print en2

# split = tree.splitDataSet(feature,0, 0)
# print tree.splitDataSet(feature,0, 0)
# print tree.splitDataSet(feature,0, 1)

# bestFeature = tree.chooseBestFeature(feature)
# print bestFeature

myTree = tree.create_tree(feature, labels)
print myTree

# treePlotter.createPlot()
feature, labels = tree.create_data_set()
pre = tree.classify(myTree, labels, [1, 0])
print pre
Exemple #8
0
# -*- coding: utf-8 -*-
import tree
import copy
dataset, label = tree.createDataSet()
print(label)
# 这里仅仅用 labels=label是不行的,因为它们指向同一个内存
labels = copy.deepcopy(label)
myTree = tree.createTree(dataset, labels)
# print(myTree)
print(label)
testResult = tree.classify(myTree, label, [1, 1])
print(testResult)
tree.storeTree(myTree, "F:\NatureRecognition/tree.txt")
tt = tree.grabTree("F:\NatureRecognition/tree.txt")
print(tt)
Exemple #9
0
print tree.calcShannonEnt([[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'yes'],
                           [0, 1, 'yes'], [0, 1, 'yes']])

print tree.splitDataSet(
    [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']],
    0, 1)

print tree.chooseBestFeatureToSplit([[1, 1, 'yes'], [1, 1, 'yes'],
                                     [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']])

print tree.createTree(
    [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']],
    ['No Surfacing?', 'Flippers?'])

t = {'No Surfacing?': {0: 'no', 1: {'Flippers?': {0: 'no', 1: 'yes'}}}}
print treePlotter.getNumLeafs(t)
print treePlotter.getTreeDepth(t)

treePlotter.createPlot(t)

print tree.classify(
    {'No Surfacing?': {
        0: 'no',
        1: {
            'Flippers?': {
                0: 'no',
                1: 'yes'
            }
        }
    }}, ['No Surfacing?', 'Flippers?'], [1, 0])
Exemple #10
0
# labels = ['no surfacing', 'filppers']
# dataset[0][-1] = 'maybe'
# shannonEnt =  tree.calcShannonEnt(dataset)
# print shannonEnt

# print tree.splitDataSet(dataset, 0, 0)
# print tree.chooseBestFeature(dataset)
# print tree.createTree(dataset, labels)
# treeplotter.createPlot()
# myTree = treeplotter.retrieveTree(0)
# print myTree
# print treeplotter.getNumLeafs(myTree)
# print treeplotter.getTreeDepth(myTree)
# treeplotter.createPlot(myTree)
# print tree.classify(myTree, labels,[1,1])
fr = open('lenses.txt')
lines = fr.readlines()

lensesAll = [ inst.split("\t") for inst in lines]
lensesTrain = lensesAll[5:len(lines)]
lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = tree.createTree(lensesTrain, lensesLables[:])
# treeplotter.createPlot(lensesTree)
# lensesTree =  tree.grabTree( 'Decision.txt')
# treeplotter.createPlot(lensesTree)
for i in range(5):
    print "分类为%s, 正确为%s" %(tree.classify(lensesTree, lensesLables, lensesAll[i][0:-1]), lensesAll[i][-1])



Exemple #11
0
import tree
import treeplotter

dataset,labels = tree.createDataSet()
print(dataset)
print(labels)
label = labels.copy()
#classlist = [example[-1] for example in dataset]
mytree = tree.createTree(dataset,labels)
print(mytree)

#treeplotter.createPlot()
#print(treeplotter.getTreeDepth(mytree))
#createPlot(mytree)
#print(label)
print(tree.classify(mytree,label,[1,0]))
treeplotter.createPlot(mytree)
Exemple #12
0
from tree import training_data, training_labels, testing_data, testing_labels, make_random_forest, make_single_tree, classify
import numpy as np
import random
np.random.seed(1)
random.seed(1)
from collections import Counter

tree = make_single_tree(training_data, training_labels)
single_tree_correct = 0

forest = make_random_forest(40, training_data, training_labels)
forest_correct = 0

for i in range(len(testing_data)):
    prediction = classify(testing_data[i], tree)
    if prediction == testing_labels[i]:
        single_tree_correct += 1
    predictions = []
    for forest_tree in forest:
        predictions.append(classify(testing_data[i], forest_tree))
    forest_prediction = max(predictions, key=predictions.count)
    if forest_prediction == testing_labels[i]:
        forest_correct += 1

print(single_tree_correct / len(testing_data))
print(forest_correct / len(testing_data))
Exemple #13
0
# tree.choose_best_feature_to_split(dataset)
#
# my_tree = tree.create_tree(dataset, labels)
#
# tree_plotter.retrieve_tree(1)

my_tree = tree_plotter.retrieve_tree(0)
#
# tree_plotter.get_num_leafs(my_tree)
#
# tree_plotter.get_tree_depth(my_tree)
tree_plotter.create_plot(my_tree)

data, labels = tree.create_dataset()

tree.classify(my_tree, labels, [1, 0])

tree.classify(my_tree, labels, [1, 1])

tree.store_tree(
    'my_tree', "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/"
    "machine_learing_algorithm/machine_learning_in_action/3_decision_tree/classifierStorage.txt"
)

tree.load_tree(
    "/home/zhangzhiliang/Documents/my_git/DATA-SCIENTIST-/"
    "machine_learing_algorithm/machine_learning_in_action/3_decision_tree/classifierStorage.txt"
)

# 隐形眼镜
fr = open(
Exemple #14
0
import tree as t
import treePlotter as tp
import os

f = open(os.path.dirname(__file__) +'/lenses.txt')
lenses = [r.strip().split('\t') for r in f.readlines()]
lensesLabel = ['age','prescript','astigmatic','tearRate']
lensesTree = t.createTree(lenses,lensesLabel)
tp.createPlot(lensesTree)
fmt = '%10s'
print [fmt % x for x in lensesLabel]
for lense in lenses:
    print [fmt % x for x in lense],t.classify(lensesTree,lensesLabel,lense[0:-1])
Exemple #15
0
# Source from Codecademy
from tree import build_tree, print_tree, car_data, car_labels, classify
import random

random.seed(4)

# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']

predictions = []
for i in range(20):
    indices = [random.randint(0, 999) for i in range(1000)]
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    result = classify(unlabeled_point, subset_tree)
    predictions.append(result)

print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)
import treePlotter
import tree

myDat, labels = tree.createDataSet()
labelsTemp = []
labelsTemp[:] = labels[:]
print('00000000000000000000labels = ', labelsTemp)
#myTree = tree.createTree(myDat, labelsTemp)
#tree.storeTree(myTree,'Tree.txt')
myTreeFromFile = tree.grabTree('Tree.txt')
print('myTreeFromFile = ', myTreeFromFile)
print('labels = ', labels)
result = tree.classify(myTreeFromFile, labels, [1, 0])
print('result = ', result)
Exemple #17
0
# When considering buying a car, what factors go into making that decision?
# Each car can fall into four different classes which represent how satisfied someone would be with purchasing the car — unacc (unacceptable), acc (acceptable), good, vgood.
# Each car has 6 features:
#     - The price of the car which can be "vhigh", "high", "med", or "low".
#     - The cost of maintaining the car which can be "vhigh", "high", "med", or "low".
#     - The number of doors which can be "2", "3", "4", "5more".
#     - The number of people the car can hold which can be "2", "4", or "more".
#     - The size of the trunk which can be "small", "med", or "big".
#     - The safety rating of the car which can be "low", "med", or "high".

from tree import tree, classify, data

car = ["low", "low", "4", "4", "big", "high"]
print(classify(car, tree))
Exemple #18
0
import arff
import tree
import sys

arg = sys.argv
m = int(arg[3])
trainData = arff.load(open(arg[1], 'r'))
testData = arff.load(open(arg[2], 'r'))

myTree = tree.createTree(trainData['data'], trainData['attributes'], m)
tree.plotTree(myTree, trainData['attributes'])

prediction = [tree.classify(myTree, testData['attributes'], obs) for obs in testData['data']]
true = [obs[-1] for obs in testData['data']]
print "<Predictions for the Test Set Instances>"
n = 0
for i in range(len(prediction)):
    index = i + 1   
    if prediction[i] == true[i]:
        n += 1
    print "{}: Actual: {} Predicted: {}".format(n, true[i], prediction[i])
print "Number of correctly classified: {} Total number of test instances: {}".format(n, len(testData['data']))