Beispiel #1
0
def ada_classify(tran_data, test_data):
    '''
    使用训练的adaboost决策树对测试数据进行预测
    '''
    res_cls = []
    sub_tree_wh = []
    wh_classifier = adaboost(tran_data, 300)
    final_cls = []
    '''
    [   [权重,决策树],
        ...
    ]
    '''

    for wh_tree in wh_classifier:
        sub_tree_wh.append(wh_tree[0])
        res_cls.append(wh_tree[1].classify(test_data))

    clses_T = map(list, zip(*res_cls))
    print "样本长度", len(clses_T), "分类器个数", len(res_cls)

    for c in clses_T:
        vote_res = {}
        for i, wh in zip(c, sub_tree_wh):
            if i in vote_res:
                vote_res[i] += wh
            else:
                vote_res[i] = wh
        final_cls.append(max(vote_res, key=vote_res.get))

    print "分类结果:", final_cls
    accurcy = check_accurcy(test_data, final_cls)
    return accurcy
Beispiel #2
0
def ada_classify(tran_data, test_data):
    '''
    使用训练的adaboost决策树对测试数据进行预测
    '''
    res_cls = []
    sub_tree_wh = []
    wh_classifier = adaboost(tran_data,300)
    final_cls = []

    '''
    [   [权重,决策树],
        ...
    ]
    '''

    for wh_tree in wh_classifier:
        sub_tree_wh.append(wh_tree[0])
        res_cls.append(wh_tree[1].classify(test_data))

    clses_T = map(list, zip(*res_cls))
    print "样本长度", len(clses_T), "分类器个数", len(res_cls)

    for c in clses_T:
        vote_res = {}
        for i, wh in zip(c, sub_tree_wh):
            if i in vote_res:
                vote_res[i] += wh
            else:
                vote_res[i] = wh
        final_cls.append(max(vote_res, key=vote_res.get))

    print "分类结果:",  final_cls
    accurcy = check_accurcy(test_data, final_cls)
    return accurcy
Beispiel #3
0
def rd_fr_classify(tran_data, test_data):
    forests = random_fr(tran_data)
    res_clses = []
    cls = []

    for tree in forests:
        res_clses.append(tree.classify(test_data))

    clses_T = map(list, zip(*res_clses))

    for c in clses_T:
        vote_cls = collections.Counter(c).most_common(1)[0][0]
        cls.append(vote_cls)

    accurcy = check_accurcy(test_data, cls)
    return accurcy
Beispiel #4
0
def rd_fr_classify(tran_data, test_data):
    forests = random_fr(tran_data)
    res_clses = []
    cls = []

    for tree in forests:
        res_clses.append(tree.classify(test_data))

    clses_T = map(list, zip(*res_clses))

    for c in clses_T:
        vote_cls = collections.Counter(c).most_common(1)[0][0]
        cls.append(vote_cls)

    accurcy = check_accurcy(test_data, cls)
    return accurcy
Beispiel #5
0
            self.leaf_err_sum(cur_node, leaf_err_set)
            leaf_e_sum  = sum(leaf_err_set) + 0.5 * len(leaf_err_set)
            leaf_err_ratio =  leaf_e_sum / len(cur_node.dataset)
            std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio))

            if leaf_e_sum + std_dev > cur_err_sum:
                print leaf_e_sum + std_dev, cur_err_sum, "  prun!!!!"
                cur_node.childNode = {}
                cur_node.cls = get_cls_from_data(cur_node.dataset)
            else:
                for _, c in cur_node.childNode.items():
                    self.__prun_tree(c)

    def prun_tree(self):
        self.__prun_tree(self.root)


if __name__ == '__main__':
    #dataset =  read_data("test.txt")
    #dataset =  read_data("breast-cancer-assignment5.txt")
    dataset =  read_data("german-assignment5.txt")
    attr_set = range(len(dataset[0]))
    DiscType =  get_disc_val(dataset)
    decisin_tree = DecisionTree(dataset[1:],attr_set, DiscType)
    #decisin_tree.prun_tree()
    res_cls = decisin_tree.classify(dataset[1:])
    #res_cls = decisin_tree.classify(dataset[1:])
    #print res_cls
    acc = check_accurcy(dataset[1:], res_cls)
    print acc
Beispiel #6
0
            self.leaf_err_sum(cur_node, leaf_err_set)
            leaf_e_sum = sum(leaf_err_set) + 0.5 * len(leaf_err_set)
            leaf_err_ratio = leaf_e_sum / len(cur_node.dataset)
            std_dev = np.sqrt(leaf_err_ratio * (1 - leaf_err_ratio))

            if leaf_e_sum + std_dev > cur_err_sum:
                print leaf_e_sum + std_dev, cur_err_sum, "  prun!!!!"
                cur_node.childNode = {}
                cur_node.cls = get_cls_from_data(cur_node.dataset)
            else:
                for _, c in cur_node.childNode.items():
                    self.__prun_tree(c)

    def prun_tree(self):
        self.__prun_tree(self.root)


if __name__ == '__main__':
    #dataset =  read_data("test.txt")
    #dataset =  read_data("breast-cancer-assignment5.txt")
    dataset = read_data("german-assignment5.txt")
    attr_set = range(len(dataset[0]))
    DiscType = get_disc_val(dataset)
    decisin_tree = DecisionTree(dataset[1:], attr_set, DiscType)
    #decisin_tree.prun_tree()
    res_cls = decisin_tree.classify(dataset[1:])
    #res_cls = decisin_tree.classify(dataset[1:])
    #print res_cls
    acc = check_accurcy(dataset[1:], res_cls)
    print acc