Beispiel #1
0
def main(date, modelType):
    """
    Runs the training script. Trains the specified model type, saves the 
    model to a prefined location (specified in the Constants file), and 
    runs basic accuracy tests on the trained model.

    :param date: Date the training and testing data was collected (YYYY_MMDD)
    :param modelType: (string) type of machine learning model to train

    :return: (None)
    """

    # Make sure that the model is a valid choice
    if (not (modelType in MODELS.keys())) and (modelType != ALL):
        print "Invalid model type:", modelType
        return

    # Allow for training more than one model at a time
    if modelType == ALL:
        modelsToTrain = MODELS.keys()
    else:
        modelsToTrain = [modelType]

    # Load the training and testing data into memory
    trainX, trainY = FileIO.loadTrainingData(date)
    testX, testY = FileIO.loadTestingData(date)

    trainX = np.nan_to_num(trainX)
    testX = np.nan_to_num(testX)

    for modelType in modelsToTrain:

        # Train the desired ML model
        name, clfType = MODELS[modelType]
        hyperparameters = HYPERPARAMETERS[modelType]
        print "Training the", name

        clf = clfType(**hyperparameters)
        clf.fit(trainX, trainY)

        # Perform some very basic accuracy testing
        trainResult = clf.predict(trainX)
        testResult = clf.predict(testX)

        trainingAccuracy = accuracy_score(trainY, trainResult)
        testingAccuracy = accuracy_score(testY, testResult)
        confusionMatrix = confusion_matrix(testY, testResult)

        print "Training Accuracy:", trainingAccuracy
        print "Testing Accuracy:", testingAccuracy
        print "Confusion Matrix:"
        print confusionMatrix
        print " "

        # Save the model to disk
        FileIO.saveModel(clf, modelType, date)
Beispiel #2
0
def main(date, modelType):
    """
    Runs the training script. Trains the specified model type, saves the 
    model to a prefined location (specified in the Constants file), and 
    runs basic accuracy tests on the trained model.

    :param date: Date the training and testing data was collected (YYYY_MMDD)
    :param modelType: (string) type of machine learning model to train

    :return: (None)
    """
    
    # Make sure that the model is a valid choice
    if (not (modelType in MODELS.keys())) and (modelType != ALL):
        print "Invalid model type:", modelType
        return

    # Allow for training more than one model at a time
    if modelType == ALL:
        modelsToTrain = MODELS.keys()
    else:
        modelsToTrain = [modelType]

    # Load the training and testing data into memory
    trainX, trainY = FileIO.loadTrainingData(date)
    testX, testY = FileIO.loadTestingData(date)

    trainX = np.nan_to_num(trainX)
    testX = np.nan_to_num(testX)

    for modelType in modelsToTrain:

        # Train the desired ML model
        name, clfType = MODELS[modelType]
        hyperparameters = HYPERPARAMETERS[modelType]
        print "Training the", name

        clf = clfType(**hyperparameters)
        clf.fit(trainX, trainY)

        # Perform some very basic accuracy testing
        trainResult = clf.predict(trainX)
        testResult = clf.predict(testX)

        trainingAccuracy = accuracy_score(trainY, trainResult)
        testingAccuracy = accuracy_score(testY, testResult)
        confusionMatrix = confusion_matrix(testY, testResult)

        print "Training Accuracy:", trainingAccuracy
        print "Testing Accuracy:", testingAccuracy
        print "Confusion Matrix:"
        print confusionMatrix
        print " "

        # Save the model to disk
        FileIO.saveModel(clf, modelType, date)
def main(date, modelType, iterations):
    """
    Determines the optimal hyperparameters for a given machine learning
    model for a set of training data.

    :param date: Date the training and testing data was collected (YYYY_MMDD)
    :param modelType: (string) type of machine learning model to train
    :param iterations: (int) number of iterations for hyperparameter searching

    :return: (None)
    """
    
    # Make sure that the model is a valid choice
    if (not (modelType in MODELS.keys())) and (modelType != ALL):
        print "Invalid model type:", modelType
        return

    # Allow for training more than one model at a time
    if modelType == ALL:
        modelsToTrain = MODELS.keys()
    else:
        modelsToTrain = [modelType]

    # Load the training and testing data into memory
    trainX, trainY = FileIO.loadTrainingData(date)
    testX, testY = FileIO.loadTestingData(date)

    trainX = np.nan_to_num(trainX)
    testX = np.nan_to_num(testX)

    for modelType in modelsToTrain:

        # Train the desired ML model
        name, clfType = MODELS[modelType]
        print "Training the", name

        baseClassifier = clfType()
        clf = RandomizedSearchCV(baseClassifier, param_distributions=PARAMETERS[modelType],
                                                 n_iter=iterations,
                                                 n_jobs=4)
        clf.fit(trainX, trainY)

        # Perform some very basic accuracy testing
        trainResult = clf.predict(trainX)
        testResult = clf.predict(testX)

        trainingAccuracy = accuracy_score(trainY, trainResult)
        testingAccuracy = accuracy_score(testY, testResult)
        confusionMatrix = confusion_matrix(testY, testResult)

        print "Training Accuracy:", trainingAccuracy
        print "Testing Accuracy:", testingAccuracy
        print "Confusion Matrix:"
        print confusionMatrix
        print " "
        print "Hyperparameters:"
        for param in PARAMETERS[modelType].keys():
            print param + ':', clf.best_estimator_.get_params()[param]
        print " "

        # Save the model to disk
        FileIO.saveModel(clf.best_estimator_, modelType, date)
def main(date, modelType, iterations):
    """
    Determines the optimal hyperparameters for a given machine learning
    model for a set of training data.

    :param date: Date the training and testing data was collected (YYYY_MMDD)
    :param modelType: (string) type of machine learning model to train
    :param iterations: (int) number of iterations for hyperparameter searching

    :return: (None)
    """

    # Make sure that the model is a valid choice
    if (not (modelType in MODELS.keys())) and (modelType != ALL):
        print "Invalid model type:", modelType
        return

    # Allow for training more than one model at a time
    if modelType == ALL:
        modelsToTrain = MODELS.keys()
    else:
        modelsToTrain = [modelType]

    # Load the training and testing data into memory
    trainX, trainY = FileIO.loadTrainingData(date)
    testX, testY = FileIO.loadTestingData(date)

    trainX = np.nan_to_num(trainX)
    testX = np.nan_to_num(testX)

    for modelType in modelsToTrain:

        # Train the desired ML model
        name, clfType = MODELS[modelType]
        print "Training the", name

        baseClassifier = clfType()
        clf = RandomizedSearchCV(baseClassifier,
                                 param_distributions=PARAMETERS[modelType],
                                 n_iter=iterations,
                                 n_jobs=4)
        clf.fit(trainX, trainY)

        # Perform some very basic accuracy testing
        trainResult = clf.predict(trainX)
        testResult = clf.predict(testX)

        trainingAccuracy = accuracy_score(trainY, trainResult)
        testingAccuracy = accuracy_score(testY, testResult)
        confusionMatrix = confusion_matrix(testY, testResult)

        print "Training Accuracy:", trainingAccuracy
        print "Testing Accuracy:", testingAccuracy
        print "Confusion Matrix:"
        print confusionMatrix
        print " "
        print "Hyperparameters:"
        for param in PARAMETERS[modelType].keys():
            print param + ':', clf.best_estimator_.get_params()[param]
        print " "

        # Save the model to disk
        FileIO.saveModel(clf.best_estimator_, modelType, date)