コード例 #1
0
    """ Use entirety of provided X, Y to predict

    Default Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Named Arguments
    --

    Returns
    classifier -- a tree fitted to Xtrain and Ytrain
    """
    classifier = KNeighborsClassifier(125)
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.

    if len(sys.argv) < 2:
        print "Usage: $ python decision-tree.py /path/to/data/file/"
    else:
        training = sys.argv[1]
        X,Y,n,f = load_data(training)
        Xt, Xv, Yt, Yv = shuffle_split(X,Y)
        Classifier = train(Xt, Yt)
        print "KNN Accuracy"
        suite(Yv, Classifier.predict(Xv))
コード例 #2
0
import logistic_regression

from util import write_test_prediction, load_validation_data, shuffle_split
from metrics import acc

import numpy as np

cross_validate = True

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    if cross_validate:
        # for cross-validation
        Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt)

        classifiers = [
            adaboost.train(Xt1, Yt1),
            extra_randomized_trees.train(Xt1, Yt1),
            gradient_boost.train(Xt1, Yt1),
            random_forest.train(Xt1, Yt1),
            logistic_regression.train(Xt1, Yt1),
        ]

        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat(
            [[c.predict(sample)[0] for c in classifiers] for sample in Xt1])

        meta_classifier = logistic_regression.train(training_predictions, Yt1)
コード例 #3
0

def classify(Xtrain, Ytrain):
    """ Use entirety of provided X, Y to predict

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    ready_tree -- a tree fitted to Xtrain and Ytrain
    """
    ready_tree = tree.DecisionTreeClassifier()
    ready_tree.fit(Xtrain, Ytrain)
    return ready_tree


if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.

    if len(sys.argv) < 2:
        print "Usage: $ python decision-tree.py /path/to/data/file/"
    else:
        training = sys.argv[1]
        X, Y, n, f = load_data(training)
        Xt, Xv, Yt, Yv = shuffle_split(X, Y)
        tree = classify(Xt, Yt)
        print "Decision Tree Accuracy:", acc(Yv, tree.predict(Xv)), "%"
コード例 #4
0
import logistic_regression

from util import write_test_prediction, load_validation_data, shuffle_split
from metrics import acc

import numpy as np

cross_validate = True

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    if cross_validate:
        # for cross-validation
        Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt)

        classifiers = [
            adaboost.train(Xt1, Yt1),
            extra_randomized_trees.train(Xt1, Yt1),
            gradient_boost.train(Xt1, Yt1),
            random_forest.train(Xt1, Yt1),
            logistic_regression.train(Xt1, Yt1),
            ]

        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt1])

        meta_classifier = logistic_regression.train(training_predictions, Yt1)