def __init__(self, n_estimators=100, max_features=None, min_samples_split=2, min_gain=0, max_depth=float("inf")): ''' :param n_estimators: number of trees :param max_features: number of features for each tree generation, maxmium number of features :param min_samples_split: used for decision tree split, param for decision tree :param min_gain: Minimum information gain req. to continue, param for decision tree :param max_depth: Maximum depth for tree, param for decision tree ''' self.n_estimators = n_estimators self.max_features = max_features self.min_samples_split = min_samples_split self.min_gain = min_gain self.max_depth = max_depth self.progressbar = progressbar.ProgressBar(widgets=bar_widgets) self.trees_feature_idx = [] self.trees = [] # initial the list of trees for _ in self.n_estimators: self.trees.append( ClassificationTree(min_samples_split=self.min_samples_split, min_impurity=min_gain, max_depth=self.max_depth))
def main(): print('-- Classification Tree') data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print('Accuracy: ', accuracy) dt = DecisionTreeClassifier() dt.fit(X_train, y_train) y_val = dt.predict(X_test) acc = accuracy_score(y_test, y_val) print('sklearn score:', acc)
def __init__(self, n_estimators=50, max_features=None, min_samples_split=2, min_gain=1e-7, max_depth=float("inf"), debug=False): self.n_estimators = n_estimators # Number of trees self.max_features = max_features # Maxmimum number of features per tree self.feature_indices = [] # The indices of the features used for each tree self.min_samples_split = min_samples_split self.min_gain = min_gain # Minimum information gain req. to continue self.max_depth = max_depth # Maximum depth for tree self.debug = debug # Initialize decision trees self.trees = [] for _ in range(n_estimators): self.trees.append( ClassificationTree( min_samples_split=self.min_samples_split, min_impurity=min_gain, max_depth=self.max_depth))
def __init__(self, n_estimators=100, min_samples_split=2, min_gain=0, max_depth=float("inf"), max_features=None): self.n_estimators = n_estimators self.min_samples_split = min_samples_split self.min_gain = min_gain self.max_depth = max_depth self.max_features = max_features self.trees = [] # 建立森林(bulid forest) for _ in range(self.n_estimators): tree = ClassificationTree(min_samples_split=self.min_samples_split, min_impurity=self.min_gain, max_depth=self.max_depth, max_features=max_features) self.trees.append(tree)
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree(max_features=2) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Our DTClassifier Accuracy:", accuracy) clf = tree.DecisionTreeClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("sklearn DTClassifier Accuracy:", accuracy)
column = np.hstack((column, float(row['director_facebook_likes']))) column = np.hstack((column, float(row['actor_1_facebook_likes']))) column = np.hstack((column, float(row['actor_2_facebook_likes']))) column = np.hstack((column, float(row['actor_3_facebook_likes']))) column = np.hstack((column, float(row['budget']))) column = np.hstack((column, float(row['facenumber_in_poster']))) target = float(row['imdb_score']) target = int(target) if target >= 4 and target <= 8: y = np.vstack((y, [target])) # scores X = np.vstack((X, column)) # dataset.append(Instance( [ncfr, dur, dfl, a3fl, a1fl, gr], [target] )) except Exception as e: pass model = ClassificationTree() X = standardize(X) # X = RobustScaler(quantile_range=(25, 75)).fit_transform(X) # X = MinMaxScaler().fit_transform(X) # # # print y datasets = k_fold_cross_validation_sets(X, y, 3) for data in datasets: X_train, X_test, y_train, y_test = data model.fit(X_train, y_train) y_pred = model.predict(X_test) print(accuracy_score(y_test, y_pred)) # print accuracy_score(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse)
# .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescaled labels {-1, 1} rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel) lda = LDA() # ........ # TRAIN # ........ print "Training:" print "\tAdaboost" adaboost.fit(X_train, rescaled_y_train) print "\tNaive Bayes" naive_bayes.fit(X_train, y_train) print "\tLogistic Regression" logistic_regression.fit(X_train, y_train) print "\tLDA"
# .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescaled labels {-1, 1} rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=50) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2) xgboost = XGBoost(n_estimators=50, learning_rate=0.5) # ........ # TRAIN # ........ print ("Training:") print ("\tAdaboost") adaboost.fit(X_train, rescaled_y_train) print ("\tDecision Tree") decision_tree.fit(X_train, y_train) print ("\tGradient Boosting")