def basic(): print("---------------basic--------------") a = np.array([['Y', 'Thin', 'N', 'Great'], ['N', 'Deep', 'N', 'Bad'], ['N', 'Stuffed', 'Y', 'Good'], ['Y', 'Stuffed', 'Y', 'Great'], ['Y', 'Deep', 'N', 'Good'], ['Y', 'Deep', 'Y', 'Great'], ['N', 'Thin', 'Y', 'Good'], ['Y', 'Deep', 'N', 'Good'], ['N', 'Thin', 'N', 'Bad']]) data = a[:, 0:-1] labels = a[:, -1].reshape(-1, 1) # print(data, labels) # for index in range(np.shape(a)[1]): # values, counts = np.unique(a[:,index], return_counts=True) # print(values, counts) classifier = DTClassifier( features=["Meat", "Crust", "Veggies", "Classification"]) classifier.fit(data, labels) lame_test = [['Y', 'Deep', 'N']] results = classifier.predict(lame_test) print(classifier.tree) print(results) print(classifier.score(lame_test, [['Good']])) print(classifier.graph())
def voting(): print("----------------voting------------------") mat = Arff("./voting.arff", label_count=1) # data = mat.data[:, 0:-1] # labels = mat.data[:, -1]#.reshape(-1, 1) splits = 10 kfolder = KFold(n_splits=splits) scores = [[], []] data, tData, labels, tLabels = train_test_split(mat.data[:, :-1], mat.data[:, -1].reshape( -1, 1), test_size=.25) best_tree = (0, None) for train, validate in kfolder.split(data, labels): # print(train, validate) dtree = DTClassifier(features=mat.get_attr_names()) dtree.fit(data[train], labels[train]) scores[0].append(dtree.score(data[validate], labels[validate])) scores[1].append(dtree.score(data[train], labels[train])) if scores[0][-1] > best_tree[0]: best_tree = (scores[0][-1], dtree) average = np.sum(scores, axis=1) / splits scores[0].append(average[0]) scores[1].append(average[1]) header_text = '' for x in range(splits): header_text = header_text + str(x) + ' ' np.savetxt("voting.csv", scores, header=header_text + 'average', delimiter=',') print(scores) print('Average CV accuracy: {:.2f}'.format(scores[0][-1])) print('Best tree accuracy: {:.2f}'.format(best_tree[1].score( tData, tLabels))) f = open("voting_tree", "w") f.write(dtree.graph(class_translator=lambda x: mat.attr_value(-1, x))) f.close()