from util import get_split_training_dataset
from metrics import suite

from sklearn.naive_bayes import GaussianNB

def train(Xtrain, Ytrain):
    """ Use entirety of provided X, Y to predict

    Default Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Named Arguments
    C -- regularization parameter

    Returns
    classifier -- a tree fitted to Xtrain and Ytrain
    """
    classifier = GaussianNB()
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Naive Bayes Classifier"
    suite(Yv, Classifier.predict(Xv))
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.grid_search import GridSearchCV


def train(Xtrain, Ytrain, n=250, d=None):
    """ Use entirety of provided X, Y to train random forest

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    classifier
    """
    classifier = ExtraTreesClassifier(n_estimators=n,
                                      max_depth=d,
                                      min_samples_split=1,
                                      random_state=0,
                                      max_features=36)
    classifier.fit(Xtrain, Ytrain)
    return classifier


if __name__ == "__main__":
    # Let's take our training data and train a random forest
    # on a subset.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Extra Random Trees Ensemble Classifier"
    suite(Yv, Classifier.predict(Xv))
    C -- regularization parameter

    Returns
    classifier -- a tree fitted to Xtrain and Ytrain
    """
    # Initialize classifier parameters for adaboost
    # For adaboost, this means the number of estimators for now
    ada = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1))
    parameters = {'n_estimators': [150]}

    # Classify over grid of parameters
    classifier = GridSearchCV(ada, parameters)
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Adaboost Classifier"
    suite(Yv, Classifier.predict(Xv))

    # smaller feature set
    Xtimp, features = fclassify.get_important_data_features(Xt, Yt, max_features=25)
    Xvimp = fclassify.compress_data_to_important_features(Xv, features)
    ClassifierImp = train(Xtimp,Yt)
    print "Adaboosts Classiifer, 25 important features"
    suite(Yv, ClassifierImp.predict(Xvimp))
Exemple #4
0
def suite():
    return unittest.TestSuite([db.suite(),
                               metrics.suite(),
                               analysis.suite(),
                               ])
Exemple #5
0
    classifier -- A random forest of n estimators, fitted to Xtrain and Ytrain
    """
    if grid == True:
        forest = RandomForestClassifier(max_depth=None, random_state=0, min_samples_split=1,max_features=38)
        parameters = {
            'n_estimators': [200,250,300],
        }

        # Classify over grid of parameters
        classifier = GridSearchCV(forest, parameters)
    else:
        classifier = RandomForestClassifier(n_estimators=n)

    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a random forest
    # on a subset.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    print "Random Forest Classifier"
    Classifier = train(Xt, Yt)
    suite(Yv, Classifier.predict(Xv))

    # smaller feature set
    Xtimp, features = fclassify.get_important_data_features(Xt, Yt)
    Xvimp = fclassify.compress_data_to_important_features(Xv, features)
    ClassifierImp = train(Xtimp,Yt)
    print "Forest Classiifer, ~25 important features"
    suite(Yv, ClassifierImp.predict(Xvimp))