def trimDF(df): dfbase = df[['GAMECODE','GAME_DATE_EST_x_x','TEAM_ABBREVIATION_x_x','TEAM_ABBREVIATION_y_x','HomeIndex_x_x','DaysRest_x','DaysRest_y','PTS_x_x']] df3PT = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_FG3A_x','avg_FG3%_x','avg_PCT_FGA_3PT_x','avg_PCT_PTS_3PT_x','avg_PCT_AST_3PM_x','avg_PCT_UAST_3PM_x','vs_avg_FG3A_y', 'vs_avg_FG3%_y','vs_avg_PCT_FGA_3PT_y','vs_avg_PCT_PTS_3PT_y','vs_avg_PCT_AST_3PM_y','vs_avg_PCT_UAST_3PM_y']] dfPAINT = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_OREB_x','avg_OREB%_x','avg_PCT_PTS_PAINT_x','avg_PTS_2ND_CHANCE_x','avg_PTS_PAINT_x','avg_Fouls_Drawn_x', 'avg_times_blocked_x','vs_avg_DREB_y','vs_avg_DREB%_y','vs_avg_PCT_PTS_PAINT_y','vs_avg_PTS_2ND_CHANCE_y','vs_avg_PTS_PAINT_y','vs_avg_Fouls_y', 'vs_avg_BLK_y']] dfFF = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_AST_x','avg_AST_PCT_x','avg_AST_TOV_x','avg_AST_RATIO_x','avg_EFG%_x','avg_TS_x','avg_FGA_x', 'avg_FG%_x','avg_FTA_x','avg_FT%_x','avg_FTA_RATE_x','avg_FT_RATE_x','avg_Fouls_Drawn_x','avg_Steals_x','avg_times_blocked_x','avg_TO_x','avg_TO%_x', 'avg_OREB_x','avg_OREB%_x','avg_PCT_FGA_2PT_x','avg_PCT_PTS_FT_x','avg_PCT_PTS_OFF_TOV_x','avg_PCT_AST_2PM_x','avg_PCT_AST_FGM_x','avg_PTS_OFF_TOV_x', 'vs_avg_AST_y','vs_avg_AST_PCT_y','vs_avg_AST_TOV_y','vs_avg_AST_RATIO_y','vs_avg_EFG%_y','vs_avg_TS_y','vs_avg_FGA_y','vs_avg_FG%_y','vs_avg_FTA_y' ,'vs_avg_FT%_y','vs_avg_FTA_RATE_y','vs_avg_FT_RATE_y','vs_avg_Fouls_Drawn_y','vs_avg_Steals_y','vs_avg_times_blocked_y','vs_avg_TO_y','vs_avg_TO%_y', 'vs_avg_OREB_y','vs_avg_OREB%_y','vs_avg_PCT_FGA_2PT_y','vs_avg_PCT_PTS_FT_y','vs_avg_PCT_PTS_OFF_TOV_y','vs_avg_PCT_AST_2PM_y','vs_avg_PCT_AST_FGM_y' ,'vs_avg_PTS_OFF_TOV_y','avg_DREB_y','avg_DREB%_y']] dfRTG = df[['GAMECODE','TEAM_ABBREVIATION_x_x','AvgORTG_x','AvgDRTG_x','AvgNET_x','HomeORTG_x','AwayORTG_x','HomeDRTG_x','AwayDRTG_x', 'Location_Avg_ORTG_x','Location_Avg_DRTG_x','AvgPace_x','Avg_Possessions_x','est_avg_Poss_x','AvgPTS_x','vs_AvgORTG_x','vs_AvgDRTG_x','vs_AvgNET_x','vs_HomeORTG_x', 'vs_AwayORTG_x','vs_HomeDRTG_x','vs_AwayDRTG_x','vs_AvgPace_x','vs_est_avg_Poss_x','vs_Avg_Possessions_x']] dfs = [dfbase,df3PT,dfPAINT,dfFF,dfRTG] df_final = reduce(lambda left,right: pd.merge(left,right,on=['GAMECODE','TEAM_ABBREVIATION_x_x']), dfs) saveToExcel(df_final,'DataForModel_'+ year +'.xlsx','Master') return df_final
def getResults(ActualDF, ProjectionsDF, Period): dfr = ActualDF[['GAMECODE', 'TEAM_ABBREVIATION', 'PTS']] dfr['GAMECODE_x'] = dfr['GAMECODE'] dfr['TEAM_ABBREVIATION_x_x'] = dfr['TEAM_ABBREVIATION'] # dfr['TEAM_ABBREVIATION_x'] = dfr['TEAM_ABBREVIATION'] # --------------MAKE TEAMABBR_XX for daily---------------- dfr = dfr[['GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'PTS']] # print(dfr.head(),ProjectionsDF.head()) df4 = pd.merge(ProjectionsDF, dfr, on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'], how='outer') # print(df4.head()) df4 = df4.dropna() df4['ActualSpread'] = df4['PTS'].shift(1) - df4['PTS'] df4['ActualOU'] = df4['PTS'].shift(-1) + df4['PTS'] df4['ActualLines'] = df4.apply(getActualLines, axis=1) df4['BetType'] = df4.apply(getBetType, axis=1) df4['Correct'] = df4.apply(isBetCorrect, axis=1) df4 = df4.loc[df4['Correct'] != 'Push'] print('Results Found') # dfs = getDataSet('Season_Results.xlsx') # dfs = dfs.append(df4) saveToExcel(df4, Period + '_Results.xlsx', 'Master') return df4
def backtest(year, both): odds = getDataSet('Historical_Odds_' + year + '.xlsx') if both: odds1 = getDataSet('Historical_Odds_' + '2015' + '.xlsx') odds2 = getDataSet('Historical_Odds_' + '2016' + '.xlsx') frames = [odds1, odds2] odds = pd.concat(frames) df = getDataSet('DataForModel_' + year + '.xlsx') if both: df1 = getDataSet('DataForModel_' + '2015' + '.xlsx') df2 = getDataSet('DataForModel_' + '2016' + '.xlsx') frames = [df1, df2] df = pd.concat(frames) actual = getDataSet('AllStats_' + year + '.xlsx') if both: actual1 = getDataSet('AllStats_' + '2015' + '.xlsx') actual2 = getDataSet('AllStats_' + '2016' + '.xlsx') frames = [actual1, actual2] actual = pd.concat(frames) Regression(df) proj = RunModels(df, odds) results = getResults(actual, proj, 'Backtest') # dfr = pd.merge(dfd, results, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left') # saveToExcel(dfr,'BackTest_Data.xlsx','Master') dfb = getResultSummary(results) if both: saveToExcel(dfb, 'BackTest_Summary.xlsx', 'Both') if not both: saveToExcel(dfb, 'BackTest_Summary.xlsx', year)
def getAllOdds(year): df = getDataSet('DataForModel_' + year + '.xlsx') dates = df.GAME_DATE_EST_x_x.unique() dfall = getDataSet('Historical_Odds_' + year + '.xlsx') done = dfall.GAME_DATE_EST_x.unique() for i in dates: if i > done[-1]: x = i i = i[:10] i = i.replace("-", "") print(i) df = main(i) # df = getDataSet('Todays_Odds.xlsx') df['AwayTeam'] = df.apply(getTeams, axis=1) df['HomeTeam'] = df.apply(getOppTeams, axis=1) df['key'] = df['key'].astype(str) df['GAMECODE_x'] = df.apply(getGameCodeODDS, axis=1) df['VegasLines'] = df.apply(filterOdds, axis=1) df['TEAM_ABBREVIATION_x_x'] = df['AwayTeam'] df = pd.DataFrame({ 'GAMECODE_x': df['GAMECODE_x'], 'TEAM_ABBREVIATION_x_x': df['TEAM_ABBREVIATION_x_x'], 'VegasLines': df['VegasLines'], 'GAME_DATE_EST_x': x }) # saveToExcel(df,'Todays_Odds.xlsx','Master') # dfh = getDataSet('Historical_Odds.xlsx') dfall = dfall.append(df) saveToExcel(dfall, 'Historical_Odds_' + year + '.xlsx', 'Master')
def RunModels(df, odds): dfx, dfy, dfh, dfa, yh, ya = buildModel(df) ThreeModel = LoadModel('3PT_Model.sav') x = dfx.values y = dfy.values threePred = ThreeModel.predict(x) df = pd.DataFrame({ 'GAMECODE_x': dfy['GAMECODE'], 'TEAM_ABBREVIATION_x_x': dfy['TEAM_ABBREVIATION_x_x'], 'Predicted': threePred }) df3 = pd.merge(df, odds, on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'], how='left') df3['CalculatedSpread'] = df3['Predicted'].shift(1) - df3['Predicted'] df3['CalculatedOU'] = df3['Predicted'].shift(-1) + df3['Predicted'] df3['CalculatedLines'] = df3.apply(getCalcedLines, axis=1) df3 = df3[[ 'GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'Predicted', 'CalculatedLines', 'VegasLines' ]] df3['Difference'] = df3['CalculatedLines'] - df3['VegasLines'] df3['BetGrade'] = df3.apply(gradeBet, axis=1) # df4 = pd.merge(df3, df, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left') saveToExcel(df3, 'BackTest_Data.xlsx', 'Master') return df3
def GetYesterdaysData(): yesterdaysGames = getGames(yesterday) df = getADVStats(yesterdaysGames) saveToExcel(df, 'yesterdaysGames.xlsx', 'Master') df1 = getDataSet('AllStats_' + year + '.xlsx') df2 = getDataSet('yesterdaysGames.xlsx') df4 = df1.tail(1) x = df4['GAMECODE'].str[:9].values df5 = df2.head(1) y = df5['GAMECODE'].str[:9].values df3 = df1.append(df2) if x != y: saveToExcel(df3, 'AllStats_' + year + '.xlsx', 'Master') return df3
def RunModels(df, odds): dfx, dfy, dfh, dfa, yh, ya = buildModel(df) home_model = LoadModel('Backtest_Home_Model.sav') away_model = LoadModel('Backtest_Away_Model.sav') xh = dfh.values xa = dfa.values # yh = yh.values # ya = ya.values homepred = home_model.predict(xh) awaypred = away_model.predict(xa) dfh1 = pd.DataFrame({ 'GAMECODE_x': yh['GAMECODE'], 'TEAM_ABBREVIATION_x_x': yh['TEAM_ABBREVIATION_x_x'], 'Predicted': homepred }) dfa1 = pd.DataFrame({ 'GAMECODE_x': ya['GAMECODE'], 'TEAM_ABBREVIATION_x_x': ya['TEAM_ABBREVIATION_x_x'], 'Predicted': awaypred }) df2 = dfh1.append(dfa1) df3 = pd.merge(df2, odds, on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'], how='left') df3['CalculatedSpread'] = df3['Predicted'].shift(1) - df3['Predicted'] df3['CalculatedOU'] = df3['Predicted'].shift(-1) + df3['Predicted'] df3['CalculatedLines'] = df3.apply(getCalcedLines, axis=1) df3 = df3[[ 'GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'Predicted', 'CalculatedLines', 'VegasLines' ]] df3['Difference'] = df3['CalculatedLines'] - df3['VegasLines'] df3['BetGrade'] = df3.apply(gradeBet, axis=1) # df4 = pd.merge(df3, df, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left') saveToExcel(df3, 'BackTest_Data.xlsx', 'Master') return df3
def GetOdds(): df1 = getDataSet('Historical_Odds_' + '2017' + '.xlsx') if fetchOdds: scrapeOdds() df = getDataSet('Todays_Odds.xlsx') df['AwayTeam'] = df.apply(getTeams, axis=1) df['HomeTeam'] = df.apply(getOppTeams, axis=1) df['key'] = df['key'].astype(str) df['GAMECODE_x'] = df.apply(getGameCodeODDS, axis=1) df['VegasLines'] = df.apply(filterOdds, axis=1) df['TEAM_ABBREVIATION_x_x'] = df['AwayTeam'] df = df[['GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'VegasLines']] saveToExcel(df, 'Todays_Odds.xlsx', 'Master') df1 = df1.append(df) saveToExcel(df1, 'Season_Odds.xlsx', 'Master') # df3 = df1.tail(1) # x = df3['GAMECODE_x'].str[:9].values # df4 =df.head(1) # y = df4['GAMECODE_x'].str[:9].values # if x != y: # df2 = df1.append(df) # saveToExcel(df2,'Season_Odds.xlsx','Master') return df1
json_inp = getStat(e, params) df = _api_scrape(json_inp, ndx) if e == 'boxscoresummaryv2': df = df[['GAME_DATE_EST', 'GAMECODE']] df1 = pd.concat([df1, df], axis=1) # print(df1) return (df1) def getADVStats(gameList, endpoints, params, ndx): df1 = pd.DataFrame() for a in gameList: print(a) params['GameID'] = a df = hitEndpoints(endpoints, params, ndx) df1 = pd.concat([df1, df], axis=0) df1.fillna(method='ffill', inplace=True) # print(df1.head()) print('Stats Compiled') # print(df1) return df1 year = "2017" gameList = getAllGames(year) # gameList = gameList[:5] df = getADVStats(gameList, endpoints, params, ndx) saveToExcel(df, "AllStats_" + year + ".xlsx", year) # print(params)
def Regression(df): df, y, dfh, dfa, yh, ya = buildModel(df) y_stats = y.iloc[:, 2].values # NOT SPLITTING HOME AND AWAY x = df.values y = y.values # # FOR HOME AND AWAY SPLIT # df, y, dfh, dfa, yh, ya = buildModel(df) # xh = dfh.values # xa = dfa.values # yh = yh.values # ya = ya.values # -------------Split Train and Test Data------------- # NOT SPLITTING HOME AND AWAY x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0) y = y[:, 2] y_train = y_train[:, 2] y_compare = y_test y_test = y_test[:, 2] # # FOR HOME AND AWAY SPLIT # x_train_h, x_test_h, y_train_h, y_test_h = train_test_split(xh,yh,test_size =0.25, random_state =0) # yh = yh[:,2] # y_train_h = y_train_h[:,2] # y_compare_h = y_test_h # y_test_h = y_test_h[:,2] # # x_train_a, x_test_a, y_train_a, y_test_a = train_test_split(xa,ya,test_size =0.25, random_state =0) # ya = ya[:,2] # y_train_a = y_train_a[:,2] # y_compare_a = y_test_a # y_test_a = y_test_a[:,2] # # x_train_p, x_test_p, y_train_p, y_test_p = train_test_split(xp,yp,test_size =0.25, random_state =0) # yp = yp[:,2] # y_train_p = y_train_p[:,2] # y_compare_p = y_test_p # y_test_p = y_test_p[:,2] # # x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(x2,y2,test_size =0.25, random_state =0) # y2 = y2[:,2] # y_train_2 = y_train_2[:,2] # y_compare_2 = y_test_2 # y_test_2 = y_test_2[:,2] # ------------------Linear-------------------- # ------------------------------REGRESSION TIME------------------------------ # NOT SPLITTING HOME AND AWAY regressor = LinearRegression() regressor.fit(x_train, y_train) # # FOR HOME AND AWAY SPLIT # regressor_h = LinearRegression() # regressor_a = LinearRegression() # regressor_p = LinearRegression() # regressor_h.fit(x_train_h, y_train_h) # regressor_a.fit(x_train_a, y_train_a) # #-------------Predict a new result with Random Forest------------- # NOT SPLITTING HOME AND AWAY y_pred = regressor.predict(x_test) # FOR HOME AND AWAY SPLIT # y_pred_h = regressor_h.predict(x_test_h) # y_pred_a = regressor_a.predict(x_test_a) # y_pred_p = regressor_p.predict(x_test_p) # y_pred_2 = regressor_2.predict(x_test_2) # #------------------RANDOM FOREST-------------------- # regressorRF = RandomForestRegressor(n_estimators=3000, random_state=0) # # regressorRF.fit(x_train,y_train) # y_pred = regressorRF.predict(x_test) # r2 = regressorRF.score(x_train, y_train) # mae = mean_absolute_error(y_test, y_pred) # mse = mean_squared_error(y_test, y_pred) # evs = explained_variance_score(y_test, y_pred) # # print(r2) # print('MAE ', mae) # print('MSE ', mse) # print('Explained Variance ', evs) # # imp = regressorRF.feature_importances_ # print(imp) # -----------------LINEAR model Scores------------------- # import statsmodels.api as sm # x = sm.add_constant(x) # x_opt = x[:,[0,1]] # # print(x_opt.dtype,y.dtype,y_stats.dtype) # regressor_ols = sm.OLS(endog = y_stats, exog = x_opt).fit() # # print(regressor_ols.summary()) #-----------------------OUTPUT--------------------- df1 = pd.DataFrame({ 'GAMECODE': y_compare[:, 0], 'TEAM_ABBREVIATION_x_x': y_compare[:, 1], 'Actual': y_test, 'Predicted': y_pred }) df1 = df1.sort_values(by=['GAMECODE'], ascending=[True]) # df4 = pd.merge(df3, df1, on=['GAMECODE','TEAM_ABBREVIATION_x']) df1['Mean_Avg_Err'] = (df1['Predicted'] - df1['Actual']).abs() df1['Mean_SQ_Err'] = df1['Mean_Avg_Err'] * df1['Mean_Avg_Err'] df1['Last_10_Avg_Err'] = df1['Mean_Avg_Err'].rolling(window=10).mean() df1['Last_10_SQ_Err'] = df1['Mean_SQ_Err'].rolling(window=10).mean() saveToExcel(df1, 'Model_Results.xlsx', 'Master') print('MAE:', df1['Mean_Avg_Err'].mean()) print('MSE:', df1['Mean_SQ_Err'].mean()) ThreePTModelFile = '3PT_Model.sav' pickle.dump(regressor, open(ThreePTModelFile, 'wb'))