def saveModel(model, filename): # Change to where you want to save the model setCurrentWorkingDirectory('SavedModels') with open(filename, 'wb') as file: pickle.dump(model, file)
def makeInterpretPredictions(currentDate, season, startOfSeason): setCurrentWorkingDirectory('SavedModels') print('Predictions for ' + currentDate + ':') predictions = predictDailyGames(currentDate, season, startOfSeason) interpretPredictions(predictions)
def createModel(startYear=None, startMonth=None, startDay=None, endYear=None, endMonth=None, endDay=None): # allGames = getTrainingSet(startYear, startMonth, startDay, endYear, endMonth, endDay) # Unnecessary if using data from CSV file # allGamesDataframe = createDataFrame(allGames) # Unnecessary if using data from CSV file setCurrentWorkingDirectory('Data') allGamesDataframe = pd.read_csv('games.csv') # Should be commented out if needing to obtain data on different range of games logRegModel = performLogReg(allGamesDataframe) saveModel(logRegModel)
def getTrainingSetCSV(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason, filename='gamesWithInfo.csv'): # Gets date, teams, and z-score difs for every game within range rangeOfGames = getTrainingSet(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason) rangeOfGamesDataframe = createDataFrame(rangeOfGames) setCurrentWorkingDirectory('Data') rangeOfGamesDataframe.to_csv(filename)
def createModel(startYear=None, startMonth=None, startDay=None, endYear=None, endMonth=None, endDay=None, season='2018-19', startOfSeason='10/16/2018', filename='rfModel.pkl'): # allGames = getTrainingSet(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startOfSeason) # Unnecessary if using data from CSV file # allGamesDataframe = createDataFrame(allGames) # Unnecessary if using data from CSV file setCurrentWorkingDirectory('Data') allGamesDataframe = pd.read_csv( 'COMBINEDgamesWithInfo2016-19.csv' ) # Should be commented out if needing to obtain data on different range of games rfModel = performRF(allGamesDataframe) saveModel(rfModel, filename)
def getPredictionsCSV(gameDataFilename, outputFilename): setCurrentWorkingDirectory('Data') gamesWithZScoreDifs = pd.read_csv(gameDataFilename) withoutNums = gamesWithZScoreDifs.loc[:, 'Home': 'Date'] # Slices dataframe to only includes home through date print(withoutNums) justZScoreDifs = gamesWithZScoreDifs.loc[:, 'W_PCT': 'TS_PCT'] # Slices dataframe to only include statistical differences setCurrentWorkingDirectory('SavedModels') with open('model12.pkl', 'rb' ) as file: # Change filename here if model is named differently pickleModel = pickle.load(file) predictions = pickleModel.predict( justZScoreDifs) # Creates list of predicted winners and losers probPredictions = pickleModel.predict_proba( justZScoreDifs) # Creates list of probabilities that home team wins numCorrect = 0 numWrong = 0 allGames = [] for i in range(len(probPredictions)): winProbability = probPredictions[i][1] homeTeam = withoutNums.iloc[i, 0] awayTeam = withoutNums.iloc[i, 1] date = withoutNums.iloc[i, 10] currentGameWithPred = [date, homeTeam, awayTeam, winProbability] allGames.append(currentGameWithPred) # Creates dataframe that holds all games info and predictions predictionsDF = pd.DataFrame( allGames, columns=['Date', 'Home', 'Away', 'Home Team Win Probability']) setCurrentWorkingDirectory('Data') predictionsDF.to_csv( outputFilename ) # Saves game info with predictions in data folder as csv file value = withoutNums.iloc[i, 9] if value == predictions[i]: numCorrect += 1 else: numWrong += 1 print('Accuracy:') print((numCorrect) / (numCorrect + numWrong) ) # Prints accuracy of model in predicting games for specified range
def makeInterpretPredictions(currentDate, season, startOfSeason): setCurrentWorkingDirectory('SavedModels') predictions = predictDailyGames(currentDate, season, startOfSeason) return (interpretPredictions(predictions))