コード例 #1
0
#!/usr/bin/python2

# This is an optimized version of gradient boost.

import sys

import gradient_boost
import feature_selection_trees as fclassify

from util import write_test_prediction, load_validation_data
from metrics import acc

import numpy as np

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    # Train a gradietn boost classifier on it.
    gboost = gradient_boost.train(Xt, Yt)
    Yhat = gboost.predict(Xv)

    # Final accuracy
    write_test_prediction("gboost_optimal_2.txt", Yhat)
コード例 #2
0
import numpy as np

cross_validate = True

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    if cross_validate:
        # for cross-validation
        Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt)

        classifiers = [
            adaboost.train(Xt1, Yt1),
            extra_randomized_trees.train(Xt1, Yt1),
            gradient_boost.train(Xt1, Yt1),
            random_forest.train(Xt1, Yt1),
            logistic_regression.train(Xt1, Yt1),
        ]

        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat(
            [[c.predict(sample)[0] for c in classifiers] for sample in Xt1])

        meta_classifier = logistic_regression.train(training_predictions, Yt1)

        # Check results on training data
        print "Accuracy for individual classifiers:", [
            acc(Yt2, c.predict(Xt2)) for c in classifiers
        ]
コード例 #3
0
import numpy as np

cross_validate = True

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    if cross_validate:
        # for cross-validation
        Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt)

        classifiers = [
            adaboost.train(Xt1, Yt1),
            extra_randomized_trees.train(Xt1, Yt1),
            gradient_boost.train(Xt1, Yt1),
            random_forest.train(Xt1, Yt1),
            logistic_regression.train(Xt1, Yt1),
            ]

        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt1])

        meta_classifier = logistic_regression.train(training_predictions, Yt1)

        # Check results on training data
        print "Accuracy for individual classifiers:", [acc(Yt2, c.predict(Xt2)) for c in classifiers]
        predictions = np.mat([c.predict(Xt2) for c in classifiers]).transpose()
        print "Accuracy for ensemble classifier:", acc(Yt2, meta_classifier.predict(predictions))