verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
# build neural net and train it net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer) trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer.trainUntilConvergence() #trainer.trainEpochs(5) print "trained" #trainer.trainEpochs(5) # Return a functor that wraps calling predict return NeuralNetworkClassifier(trainer) if __name__ == "__main__": # First obtain our training and testing data # Training has 50K samples, Testing 100K Xt, Yt, Xv = load_validation_data() # Run Neural Network over training data classifier = classify(Xt, Yt) # Prepare validation data and predict tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2) tstdata.setField('input', Xv) tstdata._convertToOneOfMany() # one output neuron per class predictions = classifier.predict(tstdata) # Write prediction to file write_test_prediction("out_nn.txt", np.array(majority))
adaboost.train(Xt, Yt), extra_randomized_trees.train(Xt, Yt), gradient_boost.train(Xt, Yt), random_forest.train(Xt, Yt), logistic_regression.train(Xt, Yt), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat( [[c.predict(sample)[0] for c in classifiers] for sample in Xt]) meta_classifier = logistic_regression.train(training_predictions, Yt) # Check results on training data print "Accuracy for individual classifiers:", [ acc(Yt, c.predict(Xt)) for c in classifiers ] predictions = np.mat([c.predict(Xt) for c in classifiers]).transpose() print "Accuracy for ensemble classifier:", acc( Yt, meta_classifier.predict(predictions)) ### TEST DATA ### # Predict on test data using the ensemble and meta classifier predictions = np.mat([c.predict(Xv) for c in classifiers]).transpose() final_predictions = meta_classifier.predict(predictions) # Final accuracy write_test_prediction("ensemble_predictions.txt", np.array(final_predictions))
""" gbc = GradientBoostingClassifier(verbose=1) parameters = {'max_depth' : range(3,11),'n_estimators' : [400,500]} classifier = GridSearchCV(gbc, parameters, scoring=metric) classifier.fit(Xtrain, Ytrain) return classifier if __name__ == "__main__": # Let's take our training data and train a decision tree # on a subset. Scikit-learn provides a good module for cross- # validation. Xt, Xv, Yt, Yv = get_split_training_dataset() Classifier = train(Xt, Yt) print "Gradient Boost Classifier" suite(Yv, Classifier.predict(Xv)) # smaller feature set Xtimp, features = fclassify.get_important_data_features(Xt, Yt) Xvimp = fclassify.compress_data_to_important_features(Xv, features) ClassifierImp = train(Xtimp,Yt) print "Gradient Boosts Classiifer, 25 important features" suite(Yv, ClassifierImp.predict(Xvimp)) # save predictions on test data X, Y, validation_data = load_validation_data() predictions = Classifier.predict(validation_data) filename = 'gradient_boost_predictions.txt' write_test_prediction(filename, np.array(predictions))
def save_test_results(self, Xt, filename="nn_predictions.txt"): predictions = self.predict(Xt) write_test_prediction(filename, np.array(predictions))
#!/usr/bin/python2 # This is an optimized version of gradient boost. import sys import gradient_boost import feature_selection_trees as fclassify from util import write_test_prediction, load_validation_data from metrics import acc import numpy as np if __name__ == "__main__": # First obtain our training and testing data Xt, Yt, Xv = load_validation_data() # Train a gradietn boost classifier on it. gboost = gradient_boost.train(Xt, Yt) Yhat = gboost.predict(Xv) # Final accuracy write_test_prediction("gboost_optimal_2.txt", Yhat)
print "Accuracy for ensemble classifier:", acc(Yt2, meta_classifier.predict(predictions)) else: # Now, we train each classifier on the training data classifiers = [ adaboost.train(Xt, Yt), extra_randomized_trees.train(Xt, Yt), gradient_boost.train(Xt, Yt), random_forest.train(Xt, Yt), logistic_regression.train(Xt, Yt), ] # Train another classifier on the ensembles output training predictions # for each sample in the training data training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt]) meta_classifier = logistic_regression.train(training_predictions, Yt) # Check results on training data print "Accuracy for individual classifiers:", [acc(Yt, c.predict(Xt)) for c in classifiers] predictions = np.mat([c.predict(Xt) for c in classifiers]).transpose() print "Accuracy for ensemble classifier:", acc(Yt, meta_classifier.predict(predictions)) ### TEST DATA ### # Predict on test data using the ensemble and meta classifier predictions = np.mat([c.predict(Xv) for c in classifiers]).transpose() final_predictions = meta_classifier.predict(predictions) # Final accuracy write_test_prediction("ensemble_predictions.txt", np.array(final_predictions))