コード例 #1
0
def main():
    x_train, x_test, y_train, y_test = process_data()  # 划分数据集
    my_tree = tree_generate(x_train, y_train, max_depth=4,
                            mini_sample=10)  # 构造决策树
    precision = evaluate(x_test, y_test, my_tree)  # 进行推断和评估
    print('准确率 = ', precision)
    treePlotter.create_plot(my_tree)  # 画出训练的决策树,便于观察结果
コード例 #2
0
def classify_lenses():
    with open("lenses.txt", "r") as f:
        lenses = [inst.strip().split('\t')
                  for inst in f.readlines()]  # 一行为一个样本(特征、类标签)
        lenses_labels = ["age", "prescript", "astigmatic", "testRate"]  # 特征标签
        lenses_tree = create_tree(lenses, lenses_labels)
        print lenses_tree
        import treePlotter
        treePlotter.create_plot(lenses_tree)
コード例 #3
0
 def get_result(self):
     """
     得到结果并绘制树
     :return:
     """
     data_set = self.train_data
     labels_tmp = self.attributes[:]
     decision_tree = self.create_tree(data_set, labels_tmp)
     print("decisionTree:\n", decision_tree)
     treePlotter.create_plot(decision_tree)
     result = self.classify_all(decision_tree)
     print("result:\n", result)
コード例 #4
0
    """
    使用pickle存储决策树
    :param input_tree: 决策树
    :param filename: 需要写入的文件名
    :return: None
    """
    import pickle
    # 以二进制的方式写入文件中
    fw = open(filename, 'wb')
    pickle.dump(input_tree, fw)
    fw.close()


def grab_tree(filename):
    """
    读出决策树结构
    :param filename: 需要读出的文件名
    :return: 返回决策树的内容
    """
    import pickle
    fr = open(filename, 'rb')
    return pickle.load(fr)


if __name__ == '__main__':
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lenses_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lenses_tree = create_tree(lenses, lenses_labels)
    treePlotter.create_plot(lenses_tree)
    print(lenses_tree)
コード例 #5
0
def test_glass():
    fr = open('lenses.txt')
    lenses = [line.strip().split('\t') for line in fr.readlines()]
    lense_labels = ['age', 'precript', 'astigmatic', 'tearRate']
    lense_tree = trees.create_tree(lenses, lense_labels)
    treePlotter.create_plot(lense_tree)
コード例 #6
0
ファイル: CART.py プロジェクト: Giyn/MachineLearningAlgorithm
    def entroy(self, y):
        p = pd.value_counts(y) / y.shape[0]  # 计算各类样本所占比率
        ent = np.sum(-p * np.log2(p))
        return ent


if __name__ == '__main__':
    data_path2 = 'watermelon2_0_Ch.txt'
    data = pd.read_table(data_path2,
                         encoding='utf8',
                         delimiter=',',
                         index_col=0)

    train = [1, 2, 3, 6, 7, 10, 14, 15, 16, 17]
    train = [i - 1 for i in train]
    X = data.iloc[train, :6]
    y = data.iloc[train, 6]

    test = [4, 5, 8, 9, 11, 12, 13]
    test = [i - 1 for i in test]

    X_val = data.iloc[test, :6]
    y_val = data.iloc[test, 6]

    tree = DecisionTree('gini', 'pre_pruning')
    tree.fit(X, y, X_val, y_val)

    print("平均准确率为:", np.mean(tree.predict(X_val) == y_val))
    treePlotter.create_plot(tree.tree_)