def test_prediction_scores(self): y_test_predicted = self.clf.predict(self.X_test) assert_equal(len(y_test_predicted), self.X_test.shape[0]) # check performance assert_greater(accuracy_score(self.y_test, y_test_predicted), self.accuracy_floor) # test utility function evaluate_print('averaging', self.y_test, y_test_predicted)
X, y, test_size=0.4, random_state=random_state) # initialize a group of clfs classifiers = [ DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state) ] clf_names = ['DT', 'LR', 'KNN', 'RF', 'GBDT'] # evaluate individual classifiers for i, clf in enumerate(classifiers): clf.fit(X_train, y_train) y_test_predict = clf.predict(X_test) evaluate_print(clf_names[i] + ' | ', y_test, y_test_predict) print() # build a Stacking model and evaluate clf = Stacking(classifiers, n_folds=4, shuffle_data=False, keep_original=True, use_proba=False, random_state=random_state) clf.fit(X_train, y_train) y_test_predict = clf.predict(X_test) evaluate_print('Stacking | ', y_test, y_test_predict)
random_state = 42 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=random_state) # initialize a group of clfs classifiers = [DecisionTreeClassifier(random_state=random_state), LogisticRegression(random_state=random_state), KNeighborsClassifier(), RandomForestClassifier(random_state=random_state), GradientBoostingClassifier(random_state=random_state)] # fit and predict by individual classifiers clf = DecisionTreeClassifier(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Decision Tree |', y_test, clf.predict(X_test)) clf = LogisticRegression(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Logistic Regression |', y_test, clf.predict(X_test)) clf = KNeighborsClassifier() clf.fit(X_train, y_train) evaluate_print('K Neighbors |', y_test, clf.predict(X_test)) clf = GradientBoostingClassifier(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Gradient Boosting |', y_test, clf.predict(X_test)) clf = RandomForestClassifier(random_state=random_state) clf.fit(X_train, y_train)
import warnings warnings.filterwarnings("ignore") if __name__ == "__main__": # Define data file and read X and y random_state = 42 X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.4, random_state=random_state) # fit and predict by individual classifiers clf = DecisionTreeClassifier(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Decision Tree |', y_test, clf.predict(X_test)) clf = LogisticRegression(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Logistic Regression |', y_test, clf.predict(X_test)) clf = KNeighborsClassifier() clf.fit(X_train, y_train) evaluate_print('K Neighbors |', y_test, clf.predict(X_test)) clf = GradientBoostingClassifier(random_state=random_state) clf.fit(X_train, y_train) evaluate_print('Gradient Boosting |', y_test, clf.predict(X_test)) clf = RandomForestClassifier(random_state=random_state) clf.fit(X_train, y_train)