def fit(self, X, y): self.trees = [] for _ in range(self.n_trees): tree = Decision_Tree(min_samples_split=self.min_sample_split, max_depth=self.max_depth, n_feats=self.n_feats) X_sample, y_sample = bootstrap_sample(X, y) tree.fit(X_sample, y_sample) self.trees.append(tree)
def decision_tree_test(): data = load_boston() X = data.data Y = data.target X = X[:, [0, 2, 4, 5, 7, 11]] dt = Decision_Tree(max_depth=5) dt.fit(X, Y) Y_pre = dt.predict(X[-20:]) return Y_pre
def fit(self, X, Y): # 循环训练每一颗决策树 self.tree = [] for s_t in range(self.n_trees): X_Sample, Y_Sample = boost_trap(X, Y) single_tree = Decision_Tree(max_depth=self.max_depth, classifier=True, Loss="Gini") single_tree.fit(X_Sample, Y_Sample) self.tree.append(single_tree) pass pass
dataset = load_iris() X, y = dataset.data, dataset.target clf_iris = Decision_Tree(max_depth = 5) # Test to make target class strings instead of integers y = ["one" if val == 1 or val == 2 else "zero" for val in y] y = np.array(y) # Need to ordinally encode strings to integers if "int" not in str(y.dtype): # Reshape y array so it works w/ ordinal encoder y = y.reshape(-1, 1) encoder = OrdinalEncoder() y = encoder.fit_transform(y) y = y.astype(int) y = y.reshape(y.size,) clf_iris.fit(X, y) temp1 = np.array([[3, 2, 1, .5]]) temp2 = np.array([[4, 2.9, 1.3, .2]]) temp3 = np.array([[3.8, 3, 1.4, .4]]) temp4 = np.array([[7.7, 2.8, 6.7, 2]]) #temp1 print("------------------------------------------------------") print(f"My Iris prediction for {temp1}:\n", clf_iris.predict(temp1)) print("------------------------------------------------------") # Test w/ Iris dataset using sklearn skl_clf_iris = DTC(splitter="best",random_state=42, max_depth=5) skl_clf_iris.fit(X,y) skl_preds_iris = skl_clf_iris.predict(temp1) print(f"SKLearn Iris prediction for {temp1}:\n",skl_preds_iris)
def decisionTree_class(): X, Y = make_blobs(n_samples=100, centers=10, n_features=10, random_state=5) dt = Decision_Tree(max_depth=5, classifier=True, Loss="Gini") dt.fit(X, Y) Y_P = dt.predict(X) return Y_P, Y
import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split from decision_tree import Decision_Tree def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy data = datasets.load_breast_cancer() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) clf = Decision_Tree(max_depth=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) acc = accuracy(y_test, y_pred) print("Accuracy:", acc)