Exemple #1
0
    def test_prediction_scores(self):
        y_test_predicted = self.clf.predict(self.X_test)
        assert_equal(len(y_test_predicted), self.X_test.shape[0])

        # check performance
        assert_greater(accuracy_score(self.y_test, y_test_predicted),
                       self.accuracy_floor)

        # test utility function
        evaluate_print('averaging', self.y_test, y_test_predicted)
        X, y, test_size=0.4, random_state=random_state)

    # initialize a group of clfs
    classifiers = [
        DecisionTreeClassifier(random_state=random_state),
        LogisticRegression(random_state=random_state),
        KNeighborsClassifier(),
        RandomForestClassifier(random_state=random_state),
        GradientBoostingClassifier(random_state=random_state)
    ]
    clf_names = ['DT', 'LR', 'KNN', 'RF', 'GBDT']

    # evaluate individual classifiers
    for i, clf in enumerate(classifiers):
        clf.fit(X_train, y_train)
        y_test_predict = clf.predict(X_test)
        evaluate_print(clf_names[i] + '   |   ', y_test, y_test_predict)

    print()
    # build a Stacking model and evaluate
    clf = Stacking(classifiers,
                   n_folds=4,
                   shuffle_data=False,
                   keep_original=True,
                   use_proba=False,
                   random_state=random_state)

    clf.fit(X_train, y_train)
    y_test_predict = clf.predict(X_test)
    evaluate_print('Stacking | ', y_test, y_test_predict)
    random_state = 42

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # initialize a group of clfs
    classifiers = [DecisionTreeClassifier(random_state=random_state),
                   LogisticRegression(random_state=random_state),
                   KNeighborsClassifier(),
                   RandomForestClassifier(random_state=random_state),
                   GradientBoostingClassifier(random_state=random_state)]

    # fit and predict by individual classifiers
    clf = DecisionTreeClassifier(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Decision Tree        |', y_test, clf.predict(X_test))

    clf = LogisticRegression(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Logistic Regression  |', y_test, clf.predict(X_test))

    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)
    evaluate_print('K Neighbors          |', y_test, clf.predict(X_test))

    clf = GradientBoostingClassifier(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Gradient Boosting    |', y_test, clf.predict(X_test))

    clf = RandomForestClassifier(random_state=random_state)
    clf.fit(X_train, y_train)
import warnings

warnings.filterwarnings("ignore")

if __name__ == "__main__":
    # Define data file and read X and y
    random_state = 42
    X, y = load_breast_cancer(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.4, random_state=random_state)

    # fit and predict by individual classifiers
    clf = DecisionTreeClassifier(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Decision Tree        |', y_test, clf.predict(X_test))

    clf = LogisticRegression(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Logistic Regression  |', y_test, clf.predict(X_test))

    clf = KNeighborsClassifier()
    clf.fit(X_train, y_train)
    evaluate_print('K Neighbors          |', y_test, clf.predict(X_test))

    clf = GradientBoostingClassifier(random_state=random_state)
    clf.fit(X_train, y_train)
    evaluate_print('Gradient Boosting    |', y_test, clf.predict(X_test))

    clf = RandomForestClassifier(random_state=random_state)
    clf.fit(X_train, y_train)