Example #1
0
File: ID3.py Project: MemIMlt/test
def main():
    dataset, label = CreatDataSet()
    label_temp = label[:]
    decision_tree = CreatTree(dataset, label_temp)
    testvec = [2, 2, 1, 0, 0, 1]
    print(classify(decision_tree, label, testvec))
    tree_plotter.create_plot(decision_tree)
def test():
    fr = open("lenses.txt")
    lense_data = [example.strip().split('\t') for example in fr.readlines()]
    ftr_list = ['age', 'prescript', 'astigmatic', 'tearRate']

    lense_tree = trees.build_tree(lense_data, ftr_list)
    print(lense_tree)

    tree_plotter.create_plot(lense_tree)
def main():
    # Track starting time of program
    t0 = t()

    # Initialize class instance of the decision tree algorithm
    dt = ID3_Decision_Tree_Algorithm()

    # Playing with the Lenses dataset
    f = open("lenses.txt")
    lenses = [line.strip().split("\t") for line in f.readlines()]
    lenses_labels = ["age", "prescript", "astigmatic", "tear_rate"]
    lenses_tree = dt.create_tree(lenses, lenses_labels)
    print(
        "\nDECISION TREE FOR THE LENSES DATASET IS: {}\n".format(lenses_tree))
    dt_plt.create_plot(t0, lenses_tree)

    # Run testing methods on decision tree algorithm
    """
    dataset, labels = dt.create_dataset()
    tree = dt_plt.retrieve_tree(0)
    dt.store_tree(tree, "classifier_storage.txt")
    grabbed_tree = dt.grab_tree("classifier_storage.txt")
    print("GRABBED DECISION TREE IS: {}\n".format(grabbed_tree))
    """

    # Classify new test vector against decision tree
    """
    dataset, labels = dt.create_dataset()
    tree = dt_plt.retrieve_tree(0)
    class_label = dt.classify(tree, labels, [1, 1]).upper()
    print("CLASS LABEL RESULT IS: {}\n".format(class_label))
    """

    # Create decision tree from dataset and labels
    """
    dataset, labels = dt.create_dataset()
    decision_tree = dt.create_tree(dataset, labels)
    print("COMPLETE DECISION TREE: {}\n".format(decision_tree))
    """

    # Track ending time of program and determine overall program runtime
    t1 = t()
    delta = (t1 - t0) * 1000

    print("Real program runtime is {0:.4g} milliseconds.\n".format(delta))
    return
Example #4
0
def main():
    # dataset,labels = create_dataset()
    # print(dataset[0])
    # print(calc_shannon_entropy(dat))
    # a = split_dataset(dataset, 0, 1)
    # b = split_dataset(dataset, 0, 0)
    # c = choose_best_feature_to_split(dataset)
    # t = create_tree(dataset,labels)
    # t = tree_plotter.retrieve_tree(0)
    #
    # print(classify(t,labels,[0,0]))
    # print(classify(t,labels,[0,1]))
    # tree_plotter.create_plot(t)
    # print(t)
    # store_tree(t,'tree.txt')
    # a = grab_tree('tree.txt')
    # print(a)
    lenses,lenses_labels = load_glasses()
    t2 = create_tree(lenses,lenses_labels)
    print(t2)
    tree_plotter.create_plot(t2)
Example #5
0
#!/usr/bin/env python

import trees as trees
import tree_plotter as tree_plotter


def parse_data(filename):
    with open(filename) as f:
        lenses = [x.strip().split('\t') for x in f.readlines()]
    lense_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
    return lenses, lense_labels


if __name__ == '__main__':
    lenses, lense_labels = parse_data('../../data/ch3/lenses.txt')
    lenses_tree = trees.create_tree(lenses, lense_labels)
    tree_plotter.create_plot(lenses_tree)
Example #6
0
import trees
import tree_plotter

my_data, _ = trees.create_dataset()
print(trees.cal_shannon_ent(my_data))

my_data[0][-1] = 'maybe'
print(trees.cal_shannon_ent(my_data))

my_data, _ = trees.create_dataset()
trees.split_dataset(my_data, 0, 1)
trees.split_dataset(my_data, 0, 0)

trees.choose_best_feature_to_split(my_data)

my_data, labels = trees.create_dataset()
trees.create_tree(my_data, labels)

tree_plotter.create_plot()
def get_tree(filename):
    with open(filename) as f:
        dec_tree = pickle.load(f)
    return dec_tree


def classify(dec_tree, data):
    label = None
    key = dec_tree.keys()[0]
    subdict = dec_tree[key]
    for value in subdict.keys():
        if data[key] == value:
            if isinstance(subdict[value], dict):
                label = classify(subdict[value], data)
            else:
                label = subdict[value]
    return label


def read_data():
    dataset = pd.read_table(
        '../data/lenses/lenses.txt',
        header=None,
        names=['age', 'prescript', 'astigmatic', 'tearRate', 'type'])
    return dataset


ds = read_data()
tree = create_tree(ds)
tp.create_plot(tree)
Example #8
0
#!usr/bin/env python
# -*- coding:utf-8 -*-

import tree_plotter
import trees

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.create_tree(lenses, lensesLabels)

print(lenses)
print(lensesTree)
tree_plotter.create_plot(lensesTree)  # 由ID3算法产生的决策树
Example #9
0

def classify(input_tree, feat_labels, test_vec):
    """
    决策树分类
    :param input_tree: 已经生成的决策树
    :param feat_labels: 存储选择的最优特征标签
    :param test_vec: 测试的数据,顺序对应最优特征标签
    :return: 分类结果
    """
    first_str = list(input_tree.keys())[0]  # 获取树的第一特征属性
    second_dict = input_tree[first_str]  # 树的分子,子集合Dict(下一个字典)
    feat_index = feat_labels.index(first_str)  # 获取决策树第一层在feat_labels中的位置
    for key in second_dict.keys():
        if test_vec[feat_index] == key:
            if type(second_dict[key]).__name__ == 'dict':
                class_label = classify(second_dict[key], feat_labels, test_vec)
            else:
                class_label = second_dict[key]
            return class_label


data_set, labels = create_data_set()
decision_tree = create_tree(data_set, labels)
print("决策树:", decision_tree)
data_set, labels = create_data_set()
print("(1)不浮出水面可以生存,无脚蹼:", classify(decision_tree, labels, [1, 0]))
print("(2)不浮出水面可以生存,有脚蹼:", classify(decision_tree, labels, [1, 1]))
print("(3)不浮出水面可以不能生存,无脚蹼:", classify(decision_tree, labels, [0, 0]))
tree_plotter.create_plot(decision_tree)
Example #10
0
                                key=operator.itemgetter(1),
                                reverse=True)
    return sorted_class_count


def create_tree(data_set, labels):
    class_list = [example[-1] for example in data_set]
    if class_list.count(class_list[0]) == len(class_list):
        return class_list[0]
    if len(data_set[0]) == 1:
        return majority_cnt(class_list)
    best_feat = choose_best_feature_to_split(data_set)
    best_feat_label = labels[best_feat]
    my_tree = {best_feat_label: {}}
    del (labels[best_feat])
    feat_values = [example[best_feat] for example in data_set]
    unique_vls = set(feat_values)
    for value in unique_vls:
        sub_labels = labels[:]
        my_tree[best_feat_label][value] = create_tree(
            split_dataset(data_set, best_feat, value), sub_labels)
    return my_tree


if __name__ == '__main__':
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lenses_labels = ['age', 'prescript', 'astigmatic', 'tear_rate']
    my_tree = create_tree(lenses, lenses_labels)
    tree_plotter.create_plot(my_tree)
Example #11
0
]
enricher = DataSetFeaturesEnricher(original_data_set, feature_creation_labels)
data_set = enricher.get_enrich_data_set()

# Create a set of short labels. Having long labels made the rendered tree unreadable.
short_labels = [
    'fn_longer_ls', 'middle', 'f&l', 'fn_before_ln', 'vowel', 'ln_even'
]

# Create the decision tree and render it.
tree = DecisionTree(data_set, short_labels).make_tree()
# create_plot(tree)

# Prune the training set.
pruned_tree = TreePruner(tree).prune()
create_plot(pruned_tree)
print('Tree depth: ', get_tree_depth(tree))

# Classify other results
c = Classifier(pruned_tree, short_labels)

print('\nClassify the training set: ')
dsc = DataSetClassifier(c, enricher)
results = dsc.classify_data_set(original_data_set)

print('Invalid classified entries:', dsc.invalid_entries, '\nTotal entries:',
      len(results), '\nError:',
      str(round(dsc.error_rate, 2)) + '%')

print('\nClassify the test set: ')
testing_data_set = DataSetLoader('dataset/test.data').load()
import tree_plotter

tree = tree_plotter.retrieve_tree(0)
tree_plotter.create_plot(tree)
    使用json模块读取树
    :param filename:
    :return:
    """
    with open(filename, 'r') as fr:
        return json.load(fr)


if __name__ == '__main__':
    # my_dataset, labels = create_dataset()
    # my_tree = retrieve_tree(0)
    # print(my_dataset)
    # print(labels)
    # print(my_tree)
    # print(classify(my_tree, labels, [1, 0]))
    # print(classify(my_tree, labels, [1, 1]))
    # store_tree(my_tree, 'classerfier_storage.txt')
    # print(grab_tree('classerfier_storage.txt'))
    # print(calc_shannon_ent(my_dataset))
    # print(spilt_dataset(my_dataset, 0, 1))
    # print(spilt_dataset(my_dataset, 0, 0))
    # print(create_tree(my_dataset, labels))
    lenses = []
    with open('lenses.txt', 'r') as fr:
        for line in fr:
            line = line.strip().split('\t')
            lenses.append(line)
    lenses_label = ['age', 'prescript', 'astigmatic', 'tear rate']
    lenses_tree = create_tree(lenses, lenses_label)
    create_plot(lenses_tree)