""" Use entirety of provided X, Y to predict Default Arguments Xtrain -- Training data Ytrain -- Training prediction Named Arguments -- Returns classifier -- a tree fitted to Xtrain and Ytrain """ classifier = KNeighborsClassifier(125) classifier.fit(Xtrain, Ytrain) return classifier if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. if len(sys.argv) < 2: print "Usage: $ python decision-tree.py /path/to/data/file/" else: training = sys.argv[1] X,Y,n,f = load_data(training) Xt, Xv, Yt, Yv = shuffle_split(X,Y) Classifier = train(Xt, Yt) print "KNN Accuracy" suite(Yv, Classifier.predict(Xv))
import logistic_regression from util import write_test_prediction, load_validation_data, shuffle_split from metrics import acc import numpy as np cross_validate = True if __name__ == "__main__": # First obtain our training and testing data Xt, Yt, Xv = load_validation_data() if cross_validate: # for cross-validation Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt) classifiers = [ adaboost.train(Xt1, Yt1), extra_randomized_trees.train(Xt1, Yt1), gradient_boost.train(Xt1, Yt1), random_forest.train(Xt1, Yt1), logistic_regression.train(Xt1, Yt1), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat( [[c.predict(sample)[0] for c in classifiers] for sample in Xt1]) meta_classifier = logistic_regression.train(training_predictions, Yt1)
def classify(Xtrain, Ytrain): """ Use entirety of provided X, Y to predict Arguments Xtrain -- Training data Ytrain -- Training prediction Returns ready_tree -- a tree fitted to Xtrain and Ytrain """ ready_tree = tree.DecisionTreeClassifier() ready_tree.fit(Xtrain, Ytrain) return ready_tree if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. if len(sys.argv) < 2: print "Usage: $ python decision-tree.py /path/to/data/file/" else: training = sys.argv[1] X, Y, n, f = load_data(training) Xt, Xv, Yt, Yv = shuffle_split(X, Y) tree = classify(Xt, Yt) print "Decision Tree Accuracy:", acc(Yv, tree.predict(Xv)), "%"
import logistic_regression from util import write_test_prediction, load_validation_data, shuffle_split from metrics import acc import numpy as np cross_validate = True if __name__ == "__main__": # First obtain our training and testing data Xt, Yt, Xv = load_validation_data() if cross_validate: # for cross-validation Xt1, Xt2, Yt1, Yt2 = shuffle_split(Xt, Yt) classifiers = [ adaboost.train(Xt1, Yt1), extra_randomized_trees.train(Xt1, Yt1), gradient_boost.train(Xt1, Yt1), random_forest.train(Xt1, Yt1), logistic_regression.train(Xt1, Yt1), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt1]) meta_classifier = logistic_regression.train(training_predictions, Yt1)