def test_classifier(dataset): return X_train, X_test, y_train, y_test = dataset my_classifier = GradientBoostingClassifier(max_depth=10, n_estimators=55) my_classifier.fit(X_train, y_train, X_test, y_test) print(my_classifier.score(X_test, y_test)) classifier = SklearnGradientBoostingClassifier(max_depth=10, n_estimators=55) classifier.fit(X_train, y_train) print(classifier.score(X_test, y_test))
def test_gradient_boosting_classification(): iris = datasets.load_iris() X, y = iris.data, iris.target print (X.shape, y.shape) train_X, train_y, test_X, test_y = split_train_test(X, y) print (train_X.shape, train_y.shape, test_X.shape, test_y.shape) clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1) clf.fit(train_X, train_y) preds = clf.predict(test_X) accuracy = cal_accuracy(test_y, preds) print ('accuracy: ', accuracy)
def test_real_data(dataset): # X_train, X_valid, X_test, y_train, y_valid, y_test = DataLoader().load() X_train, X_test, y_train, y_test = DataLoader().load() # X_train, X_test, y_train, y_test = dataset #X_train_ = np.copy(X_train) #y_train_ = np.copy(y_train) #X_test_ = np.copy(X_test) #y_test_ = np.copy(y_test) n_estimators = 300 model = GradientBoostingClassifier( n_estimators=n_estimators, max_depth=1, min_samples_split=20, subsample=0.5, learning_rate=0.2, #max_features=0.8, min_samples_leaf=10) model.fit(X_train, y_train.reshape(-1)) prediction = model.staged_predict_proba(X_test) y = loss(y_test.reshape(-1), prediction) board = DashBoard() board.init_graph(name="my_one", title="my_one", line_type="default", c="r", y=y.reshape(-1)) model = SklearnGradientBoostingClassifier( n_estimators=n_estimators, max_depth=1, criterion="mse", min_samples_split=20, min_samples_leaf=10, subsample=0.5, learning_rate=0.1, #max_features=0.1, init="zero") model.fit(X_train, y_train) prediction = np.empty([n_estimators, X_test.shape[0]]) for i, proba in enumerate(model.staged_predict_proba(X_test)): prediction[i] = proba[:, 1] print(y_test.shape, prediction.shape) # assert 0 y = loss(y_test.reshape(-1), prediction) board.init_graph(name="baseline", title="sklearn", line_type="baseline", c="g", y=y.reshape(-1)) board.make_plot()
def main(): print("-- Gradient Boosting Classification --") df = pd.read_csv("cancer_o.csv") y = df.level X = df.drop(['level', 'patient_id'], axis=1) data_classes = ["Low", "Medium", "High"] y = df['level'].apply(data_classes.index) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) clf = GradientBoostingClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=50) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier(n_estimators=20, learning_rate=1) xgboost = XGBoost() # ........ # TRAIN # ........ print ("Training:") print ("\tAdaboost") adaboost.fit(X_train, rescaled_y_train) print ("\tDecision Tree") decision_tree.fit(X_train, y_train) print ("\tGradient Boosting") gbc.fit(X_train, y_train) print ("\tLDA") lda.fit(X_train, y_train) print ("\tLogistic Regression")
# ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=50) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2) xgboost = XGBoost(n_estimators=50, learning_rate=0.5, max_depth=2) # ........ # TRAIN # ........ print("Training:") print("\tAdaboost") adaboost.fit(X_train, rescaled_y_train) print("\tDecision Tree") decision_tree.fit(X_train, y_train) print("\tGradient Boosting") gbc.fit(X_train, y_train) print("\tLDA") lda.fit(X_train, y_train)
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=50) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2) xgboost = XGBoost(n_estimators=50, learning_rate=0.5) # ........ # TRAIN # ........ print ("Training:") print ("\tAdaboost") adaboost.fit(X_train, rescaled_y_train) print ("\tDecision Tree") decision_tree.fit(X_train, y_train) print ("\tGradient Boosting") gbc.fit(X_train, y_train) print ("\tLDA") lda.fit(X_train, y_train) print ("\tLogistic Regression")
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = ClassificationTree() random_forest = RandomForest(n_estimators=150) support_vector_machine = SupportVectorMachine() lda = LDA() gbc = GradientBoostingClassifier() # ........ # TRAIN # ........ print("Training:") print("\tAdaboost") adaboost.fit(X_train, rescaled_y_train) print("\tDecision Tree") decision_tree.fit(X_train, y_train) print("\tGradient Boosting") gbc.fit(X_train, y_train) print("\tLDA") lda.fit(X_train, y_train) print("\tLogistic Regression") logistic_regression.fit(X_train, y_train)