def fit(self, X, y): self.trees = [] for _ in range(self.n_trees): tree = Decision_Tree(min_samples_split=self.min_sample_split, max_depth=self.max_depth, n_feats=self.n_feats) X_sample, y_sample = bootstrap_sample(X, y) tree.fit(X_sample, y_sample) self.trees.append(tree)
def decision_tree_test(): data = load_boston() X = data.data Y = data.target X = X[:, [0, 2, 4, 5, 7, 11]] dt = Decision_Tree(max_depth=5) dt.fit(X, Y) Y_pre = dt.predict(X[-20:]) return Y_pre
def fit(self, X, Y): # 循环训练每一颗决策树 self.tree = [] for s_t in range(self.n_trees): X_Sample, Y_Sample = boost_trap(X, Y) single_tree = Decision_Tree(max_depth=self.max_depth, classifier=True, Loss="Gini") single_tree.fit(X_Sample, Y_Sample) self.tree.append(single_tree) pass pass
def decision_tree(self, training_data, training_labels, testing_data): # Create and build the decision tree tree = Decision_Tree(training_data, training_labels) tree.print_tree_dfs() # Test for when we encounter a new category not seen before in testing # test = ["low", "high", "high", "high", "high", "high", "high", "potato"] # print(f"YEET: {tree.predict(test)}") predictions = [] for i in range(len(testing_data)): predictions.append(tree.predict(testing_data[i])) return predictions
from decision_tree import Decision_Tree from sklearn.datasets import load_iris import numpy as np from sklearn.preprocessing import OrdinalEncoder from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier as DTC from sklearn import tree # Test w/ Iris dataset using my class dataset = load_iris() X, y = dataset.data, dataset.target clf_iris = Decision_Tree(max_depth = 5) # Test to make target class strings instead of integers y = ["one" if val == 1 or val == 2 else "zero" for val in y] y = np.array(y) # Need to ordinally encode strings to integers if "int" not in str(y.dtype): # Reshape y array so it works w/ ordinal encoder y = y.reshape(-1, 1) encoder = OrdinalEncoder() y = encoder.fit_transform(y) y = y.astype(int) y = y.reshape(y.size,) clf_iris.fit(X, y) temp1 = np.array([[3, 2, 1, .5]]) temp2 = np.array([[4, 2.9, 1.3, .2]]) temp3 = np.array([[3.8, 3, 1.4, .4]]) temp4 = np.array([[7.7, 2.8, 6.7, 2]])
from read_file import Read_File from data_processor import Data_Processor from svm import SVM from decision_tree import Decision_Tree fileName = 'data/census-income_10percentData.csv' file_reader = Read_File(fileName) file_reader.read() features = file_reader.get_features() labels = file_reader.get_labels() data_fill = Data_Processor(features) data_fill.fill_empty_fields() # will perform svm task my_svm = SVM(features, labels) my_svm.calculate_info_gain() my_svm.stratified_k_fold(10) my_svm.svm() my_svm.draw_svm() my_tree = Decision_Tree(features, labels) my_tree.calculate_info_gain() for i in range(1, 14): print "============================{}============================".format( i) my_tree.decision(i)
def decisionTree_class(): X, Y = make_blobs(n_samples=100, centers=10, n_features=10, random_state=5) dt = Decision_Tree(max_depth=5, classifier=True, Loss="Gini") dt.fit(X, Y) Y_P = dt.predict(X) return Y_P, Y
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from decision_tree import Decision_Tree def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy data = datasets.load_breast_cancer() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) clf = Decision_Tree(max_depth=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy(y_test, y_pred) print("Accuracy:", acc)
{ # Choosing month of date "date": row[0].split("/")[0], # log10 of confirmed "confirmed": int(math.log10(int(row[1]))), # log10 of recovered "recovered": int(math.log10(int(row[2]))), # log10 of deaths "deaths": int(math.log10(int(row[3]))), }, discretise_target(row[4])) examples.append(data) # Shuffling for randomness random.shuffle(examples) tre = list() tee = list() # Split the data 80/20 split_index = (int)((80 / 100) * len(examples)) tre = examples[:split_index] tee = examples[split_index:] decision_tree = Decision_Tree(tre, depth, pruning) print("DECISION TREE") print(decision_tree) print("MAXIMUM DEPTH REACHED") print(decision_tree.depth_reached) print("ACCURACY OVER TESTING") print(decision_tree.test_accuracy(tee) * 100, "%")