def main(): dataset, label = CreatDataSet() label_temp = label[:] decision_tree = CreatTree(dataset, label_temp) testvec = [2, 2, 1, 0, 0, 1] print(classify(decision_tree, label, testvec)) tree_plotter.create_plot(decision_tree)
def test(): fr = open("lenses.txt") lense_data = [example.strip().split('\t') for example in fr.readlines()] ftr_list = ['age', 'prescript', 'astigmatic', 'tearRate'] lense_tree = trees.build_tree(lense_data, ftr_list) print(lense_tree) tree_plotter.create_plot(lense_tree)
def main(): # Track starting time of program t0 = t() # Initialize class instance of the decision tree algorithm dt = ID3_Decision_Tree_Algorithm() # Playing with the Lenses dataset f = open("lenses.txt") lenses = [line.strip().split("\t") for line in f.readlines()] lenses_labels = ["age", "prescript", "astigmatic", "tear_rate"] lenses_tree = dt.create_tree(lenses, lenses_labels) print( "\nDECISION TREE FOR THE LENSES DATASET IS: {}\n".format(lenses_tree)) dt_plt.create_plot(t0, lenses_tree) # Run testing methods on decision tree algorithm """ dataset, labels = dt.create_dataset() tree = dt_plt.retrieve_tree(0) dt.store_tree(tree, "classifier_storage.txt") grabbed_tree = dt.grab_tree("classifier_storage.txt") print("GRABBED DECISION TREE IS: {}\n".format(grabbed_tree)) """ # Classify new test vector against decision tree """ dataset, labels = dt.create_dataset() tree = dt_plt.retrieve_tree(0) class_label = dt.classify(tree, labels, [1, 1]).upper() print("CLASS LABEL RESULT IS: {}\n".format(class_label)) """ # Create decision tree from dataset and labels """ dataset, labels = dt.create_dataset() decision_tree = dt.create_tree(dataset, labels) print("COMPLETE DECISION TREE: {}\n".format(decision_tree)) """ # Track ending time of program and determine overall program runtime t1 = t() delta = (t1 - t0) * 1000 print("Real program runtime is {0:.4g} milliseconds.\n".format(delta)) return
def main(): # dataset,labels = create_dataset() # print(dataset[0]) # print(calc_shannon_entropy(dat)) # a = split_dataset(dataset, 0, 1) # b = split_dataset(dataset, 0, 0) # c = choose_best_feature_to_split(dataset) # t = create_tree(dataset,labels) # t = tree_plotter.retrieve_tree(0) # # print(classify(t,labels,[0,0])) # print(classify(t,labels,[0,1])) # tree_plotter.create_plot(t) # print(t) # store_tree(t,'tree.txt') # a = grab_tree('tree.txt') # print(a) lenses,lenses_labels = load_glasses() t2 = create_tree(lenses,lenses_labels) print(t2) tree_plotter.create_plot(t2)
#!/usr/bin/env python import trees as trees import tree_plotter as tree_plotter def parse_data(filename): with open(filename) as f: lenses = [x.strip().split('\t') for x in f.readlines()] lense_labels = ['age', 'prescript', 'astigmatic', 'tearRate'] return lenses, lense_labels if __name__ == '__main__': lenses, lense_labels = parse_data('../../data/ch3/lenses.txt') lenses_tree = trees.create_tree(lenses, lense_labels) tree_plotter.create_plot(lenses_tree)
import trees import tree_plotter my_data, _ = trees.create_dataset() print(trees.cal_shannon_ent(my_data)) my_data[0][-1] = 'maybe' print(trees.cal_shannon_ent(my_data)) my_data, _ = trees.create_dataset() trees.split_dataset(my_data, 0, 1) trees.split_dataset(my_data, 0, 0) trees.choose_best_feature_to_split(my_data) my_data, labels = trees.create_dataset() trees.create_tree(my_data, labels) tree_plotter.create_plot()
def get_tree(filename): with open(filename) as f: dec_tree = pickle.load(f) return dec_tree def classify(dec_tree, data): label = None key = dec_tree.keys()[0] subdict = dec_tree[key] for value in subdict.keys(): if data[key] == value: if isinstance(subdict[value], dict): label = classify(subdict[value], data) else: label = subdict[value] return label def read_data(): dataset = pd.read_table( '../data/lenses/lenses.txt', header=None, names=['age', 'prescript', 'astigmatic', 'tearRate', 'type']) return dataset ds = read_data() tree = create_tree(ds) tp.create_plot(tree)
#!usr/bin/env python # -*- coding:utf-8 -*- import tree_plotter import trees fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.create_tree(lenses, lensesLabels) print(lenses) print(lensesTree) tree_plotter.create_plot(lensesTree) # 由ID3算法产生的决策树
def classify(input_tree, feat_labels, test_vec): """ 决策树分类 :param input_tree: 已经生成的决策树 :param feat_labels: 存储选择的最优特征标签 :param test_vec: 测试的数据,顺序对应最优特征标签 :return: 分类结果 """ first_str = list(input_tree.keys())[0] # 获取树的第一特征属性 second_dict = input_tree[first_str] # 树的分子,子集合Dict(下一个字典) feat_index = feat_labels.index(first_str) # 获取决策树第一层在feat_labels中的位置 for key in second_dict.keys(): if test_vec[feat_index] == key: if type(second_dict[key]).__name__ == 'dict': class_label = classify(second_dict[key], feat_labels, test_vec) else: class_label = second_dict[key] return class_label data_set, labels = create_data_set() decision_tree = create_tree(data_set, labels) print("决策树:", decision_tree) data_set, labels = create_data_set() print("(1)不浮出水面可以生存,无脚蹼:", classify(decision_tree, labels, [1, 0])) print("(2)不浮出水面可以生存,有脚蹼:", classify(decision_tree, labels, [1, 1])) print("(3)不浮出水面可以不能生存,无脚蹼:", classify(decision_tree, labels, [0, 0])) tree_plotter.create_plot(decision_tree)
key=operator.itemgetter(1), reverse=True) return sorted_class_count def create_tree(data_set, labels): class_list = [example[-1] for example in data_set] if class_list.count(class_list[0]) == len(class_list): return class_list[0] if len(data_set[0]) == 1: return majority_cnt(class_list) best_feat = choose_best_feature_to_split(data_set) best_feat_label = labels[best_feat] my_tree = {best_feat_label: {}} del (labels[best_feat]) feat_values = [example[best_feat] for example in data_set] unique_vls = set(feat_values) for value in unique_vls: sub_labels = labels[:] my_tree[best_feat_label][value] = create_tree( split_dataset(data_set, best_feat, value), sub_labels) return my_tree if __name__ == '__main__': fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lenses_labels = ['age', 'prescript', 'astigmatic', 'tear_rate'] my_tree = create_tree(lenses, lenses_labels) tree_plotter.create_plot(my_tree)
] enricher = DataSetFeaturesEnricher(original_data_set, feature_creation_labels) data_set = enricher.get_enrich_data_set() # Create a set of short labels. Having long labels made the rendered tree unreadable. short_labels = [ 'fn_longer_ls', 'middle', 'f&l', 'fn_before_ln', 'vowel', 'ln_even' ] # Create the decision tree and render it. tree = DecisionTree(data_set, short_labels).make_tree() # create_plot(tree) # Prune the training set. pruned_tree = TreePruner(tree).prune() create_plot(pruned_tree) print('Tree depth: ', get_tree_depth(tree)) # Classify other results c = Classifier(pruned_tree, short_labels) print('\nClassify the training set: ') dsc = DataSetClassifier(c, enricher) results = dsc.classify_data_set(original_data_set) print('Invalid classified entries:', dsc.invalid_entries, '\nTotal entries:', len(results), '\nError:', str(round(dsc.error_rate, 2)) + '%') print('\nClassify the test set: ') testing_data_set = DataSetLoader('dataset/test.data').load()
import tree_plotter tree = tree_plotter.retrieve_tree(0) tree_plotter.create_plot(tree)
使用json模块读取树 :param filename: :return: """ with open(filename, 'r') as fr: return json.load(fr) if __name__ == '__main__': # my_dataset, labels = create_dataset() # my_tree = retrieve_tree(0) # print(my_dataset) # print(labels) # print(my_tree) # print(classify(my_tree, labels, [1, 0])) # print(classify(my_tree, labels, [1, 1])) # store_tree(my_tree, 'classerfier_storage.txt') # print(grab_tree('classerfier_storage.txt')) # print(calc_shannon_ent(my_dataset)) # print(spilt_dataset(my_dataset, 0, 1)) # print(spilt_dataset(my_dataset, 0, 0)) # print(create_tree(my_dataset, labels)) lenses = [] with open('lenses.txt', 'r') as fr: for line in fr: line = line.strip().split('\t') lenses.append(line) lenses_label = ['age', 'prescript', 'astigmatic', 'tear rate'] lenses_tree = create_tree(lenses, lenses_label) create_plot(lenses_tree)