Ejemplo n.º 1
0
def voting():
    print("----------------voting------------------")

    mat = Arff("./voting.arff", label_count=1)
    # data = mat.data[:, 0:-1]
    # labels = mat.data[:, -1]#.reshape(-1, 1)
    splits = 10
    kfolder = KFold(n_splits=splits)

    scores = [[], []]

    data, tData, labels, tLabels = train_test_split(mat.data[:, :-1],
                                                    mat.data[:, -1].reshape(
                                                        -1, 1),
                                                    test_size=.25)
    best_tree = (0, None)
    for train, validate in kfolder.split(data, labels):
        # print(train, validate)
        dtree = DTClassifier(features=mat.get_attr_names())
        dtree.fit(data[train], labels[train])

        scores[0].append(dtree.score(data[validate], labels[validate]))
        scores[1].append(dtree.score(data[train], labels[train]))
        if scores[0][-1] > best_tree[0]:
            best_tree = (scores[0][-1], dtree)

    average = np.sum(scores, axis=1) / splits
    scores[0].append(average[0])
    scores[1].append(average[1])
    header_text = ''
    for x in range(splits):
        header_text = header_text + str(x) + ' '

    np.savetxt("voting.csv",
               scores,
               header=header_text + 'average',
               delimiter=',')
    print(scores)
    print('Average CV accuracy: {:.2f}'.format(scores[0][-1]))
    print('Best tree accuracy: {:.2f}'.format(best_tree[1].score(
        tData, tLabels)))
    f = open("voting_tree", "w")
    f.write(dtree.graph(class_translator=lambda x: mat.attr_value(-1, x)))
    f.close()