def main(date, modelType): """ Runs the training script. Trains the specified model type, saves the model to a prefined location (specified in the Constants file), and runs basic accuracy tests on the trained model. :param date: Date the training and testing data was collected (YYYY_MMDD) :param modelType: (string) type of machine learning model to train :return: (None) """ # Make sure that the model is a valid choice if (not (modelType in MODELS.keys())) and (modelType != ALL): print "Invalid model type:", modelType return # Allow for training more than one model at a time if modelType == ALL: modelsToTrain = MODELS.keys() else: modelsToTrain = [modelType] # Load the training and testing data into memory trainX, trainY = FileIO.loadTrainingData(date) testX, testY = FileIO.loadTestingData(date) trainX = np.nan_to_num(trainX) testX = np.nan_to_num(testX) for modelType in modelsToTrain: # Train the desired ML model name, clfType = MODELS[modelType] hyperparameters = HYPERPARAMETERS[modelType] print "Training the", name clf = clfType(**hyperparameters) clf.fit(trainX, trainY) # Perform some very basic accuracy testing trainResult = clf.predict(trainX) testResult = clf.predict(testX) trainingAccuracy = accuracy_score(trainY, trainResult) testingAccuracy = accuracy_score(testY, testResult) confusionMatrix = confusion_matrix(testY, testResult) print "Training Accuracy:", trainingAccuracy print "Testing Accuracy:", testingAccuracy print "Confusion Matrix:" print confusionMatrix print " " # Save the model to disk FileIO.saveModel(clf, modelType, date)
def main(date, modelType): """ Runs the training script. Trains the specified model type, saves the model to a prefined location (specified in the Constants file), and runs basic accuracy tests on the trained model. :param date: Date the training and testing data was collected (YYYY_MMDD) :param modelType: (string) type of machine learning model to train :return: (None) """ # Make sure that the model is a valid choice if (not (modelType in MODELS.keys())) and (modelType != ALL): print "Invalid model type:", modelType return # Allow for training more than one model at a time if modelType == ALL: modelsToTrain = MODELS.keys() else: modelsToTrain = [modelType] # Load the training and testing data into memory trainX, trainY = FileIO.loadTrainingData(date) testX, testY = FileIO.loadTestingData(date) trainX = np.nan_to_num(trainX) testX = np.nan_to_num(testX) for modelType in modelsToTrain: # Train the desired ML model name, clfType = MODELS[modelType] hyperparameters = HYPERPARAMETERS[modelType] print "Training the", name clf = clfType(**hyperparameters) clf.fit(trainX, trainY) # Perform some very basic accuracy testing trainResult = clf.predict(trainX) testResult = clf.predict(testX) trainingAccuracy = accuracy_score(trainY, trainResult) testingAccuracy = accuracy_score(testY, testResult) confusionMatrix = confusion_matrix(testY, testResult) print "Training Accuracy:", trainingAccuracy print "Testing Accuracy:", testingAccuracy print "Confusion Matrix:" print confusionMatrix print " " # Save the model to disk FileIO.saveModel(clf, modelType, date)
def main(date, modelType, iterations): """ Determines the optimal hyperparameters for a given machine learning model for a set of training data. :param date: Date the training and testing data was collected (YYYY_MMDD) :param modelType: (string) type of machine learning model to train :param iterations: (int) number of iterations for hyperparameter searching :return: (None) """ # Make sure that the model is a valid choice if (not (modelType in MODELS.keys())) and (modelType != ALL): print "Invalid model type:", modelType return # Allow for training more than one model at a time if modelType == ALL: modelsToTrain = MODELS.keys() else: modelsToTrain = [modelType] # Load the training and testing data into memory trainX, trainY = FileIO.loadTrainingData(date) testX, testY = FileIO.loadTestingData(date) trainX = np.nan_to_num(trainX) testX = np.nan_to_num(testX) for modelType in modelsToTrain: # Train the desired ML model name, clfType = MODELS[modelType] print "Training the", name baseClassifier = clfType() clf = RandomizedSearchCV(baseClassifier, param_distributions=PARAMETERS[modelType], n_iter=iterations, n_jobs=4) clf.fit(trainX, trainY) # Perform some very basic accuracy testing trainResult = clf.predict(trainX) testResult = clf.predict(testX) trainingAccuracy = accuracy_score(trainY, trainResult) testingAccuracy = accuracy_score(testY, testResult) confusionMatrix = confusion_matrix(testY, testResult) print "Training Accuracy:", trainingAccuracy print "Testing Accuracy:", testingAccuracy print "Confusion Matrix:" print confusionMatrix print " " print "Hyperparameters:" for param in PARAMETERS[modelType].keys(): print param + ':', clf.best_estimator_.get_params()[param] print " " # Save the model to disk FileIO.saveModel(clf.best_estimator_, modelType, date)
def main(date, modelType, iterations): """ Determines the optimal hyperparameters for a given machine learning model for a set of training data. :param date: Date the training and testing data was collected (YYYY_MMDD) :param modelType: (string) type of machine learning model to train :param iterations: (int) number of iterations for hyperparameter searching :return: (None) """ # Make sure that the model is a valid choice if (not (modelType in MODELS.keys())) and (modelType != ALL): print "Invalid model type:", modelType return # Allow for training more than one model at a time if modelType == ALL: modelsToTrain = MODELS.keys() else: modelsToTrain = [modelType] # Load the training and testing data into memory trainX, trainY = FileIO.loadTrainingData(date) testX, testY = FileIO.loadTestingData(date) trainX = np.nan_to_num(trainX) testX = np.nan_to_num(testX) for modelType in modelsToTrain: # Train the desired ML model name, clfType = MODELS[modelType] print "Training the", name baseClassifier = clfType() clf = RandomizedSearchCV(baseClassifier, param_distributions=PARAMETERS[modelType], n_iter=iterations, n_jobs=4) clf.fit(trainX, trainY) # Perform some very basic accuracy testing trainResult = clf.predict(trainX) testResult = clf.predict(testX) trainingAccuracy = accuracy_score(trainY, trainResult) testingAccuracy = accuracy_score(testY, testResult) confusionMatrix = confusion_matrix(testY, testResult) print "Training Accuracy:", trainingAccuracy print "Testing Accuracy:", testingAccuracy print "Confusion Matrix:" print confusionMatrix print " " print "Hyperparameters:" for param in PARAMETERS[modelType].keys(): print param + ':', clf.best_estimator_.get_params()[param] print " " # Save the model to disk FileIO.saveModel(clf.best_estimator_, modelType, date)