verbose=True,
                              weightdecay=0.01)

    trainer.trainUntilConvergence()
    #trainer.trainEpochs(5)

    print "trained"
    #trainer.trainEpochs(5)

    # Return a functor that wraps calling predict
    return NeuralNetworkClassifier(trainer)


if __name__ == "__main__":
    # First obtain our training and testing data
    # Training has 50K samples, Testing 100K
    Xt, Yt, Xv = load_validation_data()

    # Run Neural Network over training data
    classifier = classify(Xt, Yt)

    # Prepare validation data and predict
    tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2)
    tstdata.setField('input', Xv)
    tstdata._convertToOneOfMany()  # one output neuron per class

    predictions = classifier.predict(tstdata)

    # Write prediction to file
    write_test_prediction("out_nn.txt", np.array(majority))
    # build neural net and train it
    net = buildNetwork(trndata.indim, n_hidden, trndata.outdim, outclass=SoftmaxLayer)
    trainer = BackpropTrainer(net, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)

    trainer.trainUntilConvergence()
    #trainer.trainEpochs(5)

    print "trained"
    #trainer.trainEpochs(5)

    # Return a functor that wraps calling predict
    return NeuralNetworkClassifier(trainer)

if __name__ == "__main__":
    # First obtain our training and testing data
    # Training has 50K samples, Testing 100K
    Xt, Yt, Xv = load_validation_data()

    # Run Neural Network over training data
    classifier = classify(Xt, Yt)

    # Prepare validation data and predict
    tstdata = ClassificationDataSet(Xv.shape[1], 1, nb_classes=2)
    tstdata.setField('input', Xv)
    tstdata._convertToOneOfMany() # one output neuron per class

    predictions = classifier.predict(tstdata)

    # Write prediction to file
    write_test_prediction("out_nn.txt", np.array(majority))
Exemple #3
0
            adaboost.train(Xt, Yt),
            extra_randomized_trees.train(Xt, Yt),
            gradient_boost.train(Xt, Yt),
            random_forest.train(Xt, Yt),
            logistic_regression.train(Xt, Yt),
        ]
        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat(
            [[c.predict(sample)[0] for c in classifiers] for sample in Xt])

        meta_classifier = logistic_regression.train(training_predictions, Yt)

        # Check results on training data
        print "Accuracy for individual classifiers:", [
            acc(Yt, c.predict(Xt)) for c in classifiers
        ]
        predictions = np.mat([c.predict(Xt) for c in classifiers]).transpose()
        print "Accuracy for ensemble classifier:", acc(
            Yt, meta_classifier.predict(predictions))

    ### TEST DATA ###

    # Predict on test data using the ensemble and meta classifier
    predictions = np.mat([c.predict(Xv) for c in classifiers]).transpose()
    final_predictions = meta_classifier.predict(predictions)

    # Final accuracy
    write_test_prediction("ensemble_predictions.txt",
                          np.array(final_predictions))
Exemple #4
0
    """
    gbc = GradientBoostingClassifier(verbose=1)
    parameters = {'max_depth' : range(3,11),'n_estimators' : [400,500]}

    classifier = GridSearchCV(gbc, parameters, scoring=metric)
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Gradient Boost Classifier"
    suite(Yv, Classifier.predict(Xv))

    # smaller feature set
    Xtimp, features = fclassify.get_important_data_features(Xt, Yt)
    Xvimp = fclassify.compress_data_to_important_features(Xv, features)
    ClassifierImp = train(Xtimp,Yt)
    print "Gradient Boosts Classiifer, 25 important features"
    suite(Yv, ClassifierImp.predict(Xvimp))

    # save predictions on test data

    X, Y, validation_data = load_validation_data()
    predictions = Classifier.predict(validation_data)
    filename = 'gradient_boost_predictions.txt'
    write_test_prediction(filename, np.array(predictions))
Exemple #5
0
 def save_test_results(self, Xt, filename="nn_predictions.txt"):
     predictions = self.predict(Xt)
     write_test_prediction(filename, np.array(predictions))
Exemple #6
0
#!/usr/bin/python2

# This is an optimized version of gradient boost.

import sys

import gradient_boost
import feature_selection_trees as fclassify

from util import write_test_prediction, load_validation_data
from metrics import acc

import numpy as np

if __name__ == "__main__":
    # First obtain our training and testing data
    Xt, Yt, Xv = load_validation_data()

    # Train a gradietn boost classifier on it.
    gboost = gradient_boost.train(Xt, Yt)
    Yhat = gboost.predict(Xv)

    # Final accuracy
    write_test_prediction("gboost_optimal_2.txt", Yhat)
 def save_test_results(self, Xt, filename="nn_predictions.txt"):
     predictions = self.predict(Xt)
     write_test_prediction(filename, np.array(predictions))
        print "Accuracy for ensemble classifier:", acc(Yt2, meta_classifier.predict(predictions))

    else:
        # Now, we train each classifier on the training data
        classifiers = [
            adaboost.train(Xt, Yt),
            extra_randomized_trees.train(Xt, Yt),
            gradient_boost.train(Xt, Yt),
            random_forest.train(Xt, Yt),
            logistic_regression.train(Xt, Yt),
            ]
        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt])

        meta_classifier = logistic_regression.train(training_predictions, Yt)

        # Check results on training data
        print "Accuracy for individual classifiers:", [acc(Yt, c.predict(Xt)) for c in classifiers]
        predictions = np.mat([c.predict(Xt) for c in classifiers]).transpose()
        print "Accuracy for ensemble classifier:", acc(Yt, meta_classifier.predict(predictions))

    ### TEST DATA ###

    # Predict on test data using the ensemble and meta classifier
    predictions = np.mat([c.predict(Xv) for c in classifiers]).transpose()
    final_predictions = meta_classifier.predict(predictions)

    # Final accuracy
    write_test_prediction("ensemble_predictions.txt", np.array(final_predictions))