def sk_learn(data="oldGames.arff", min_split=300, min_leaf=15): folds = 10 mat = Arff(data, label_count=1) counts = [] ## this is so you know how many types for each column for i in range(mat.data.shape[1]): counts += [mat.unique_value_count(i)] # np.random.seed(35) np.random.shuffle(mat.data) splits = np.array_split(mat.data, folds) Acc = 0 # min_split = 300 # print("Minsplit: {}".format(min_split)) for f in range(folds): # print("Fold {}:".format(f)) train = np.array([]) for other in range(folds): if train.size == 0 and other != f: train = splits[other].copy() elif other != f: train = np.concatenate((train, splits[other])) data = train[:, 0:-1] labels = train[:, -1].reshape(-1, 1) clf = tree.DecisionTreeClassifier( ) #min_samples_split=min_split, min_samples_leaf=min_leaf clf = clf.fit(data, labels) pred = clf.predict(data) new_acc = score(pred, labels) # print("\tTrain Acc {}".format(new_acc)) data2 = splits[f][:, 0:-1] labels2 = splits[f][:, -1].reshape(-1, 1) pred = clf.predict(data2) new_acc = score(pred, labels2) # print("\tTest Acc {}".format(new_acc)) Acc += new_acc Acc = Acc / folds print("Accuracy = [{:.4f}]".format(Acc)) classes = [ "Overwhelmingly_Positive", "Very_Positive", "Positive", "Mostly_Positive", "Mixed", "Mostly_Negative", "Negative", "Very_Negative", "Overwhelmingly_Negative" ] dot_data = tree.export_graphviz(clf, out_file=None, feature_names=mat.get_attr_names()[:-1], class_names=classes, filled=True, rounded=True) # max_depth=6, graph = graphviz.Source(dot_data) graph.render("old_games") return Acc
""" size = X.shape[0] score = 0 predicted = self.predict(X) for i in range(size): if predicted[i] == y[i]: score += 1 #print("score is ", score/size) return score/size if __name__ == "__main__": mat = Arff("car.arff",label_count=1) counts = [] ## this is so you know how many types for each column for i in range(mat.data.shape[1]): counts += [mat.unique_value_count(i)] #print(counts) data = mat.data[:,0:-1] labels = mat.data[:,-1].reshape(-1,1) #removing nans median = np.median(data) roundedMedian = np.round_(median, 0) data[np.isnan(data)]= roundedMedian DTClass = DTClassifier(counts) DTClass.fit(data,labels, counts) acc = DTClass.score(data, labels) print("acc ", acc) clf = DecisionTreeClassifier(random_state=0)