예제 #1
0
def trimDF(df):
    dfbase = df[['GAMECODE','GAME_DATE_EST_x_x','TEAM_ABBREVIATION_x_x','TEAM_ABBREVIATION_y_x','HomeIndex_x_x','DaysRest_x','DaysRest_y','PTS_x_x']]

    df3PT = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_FG3A_x','avg_FG3%_x','avg_PCT_FGA_3PT_x','avg_PCT_PTS_3PT_x','avg_PCT_AST_3PM_x','avg_PCT_UAST_3PM_x','vs_avg_FG3A_y',
    'vs_avg_FG3%_y','vs_avg_PCT_FGA_3PT_y','vs_avg_PCT_PTS_3PT_y','vs_avg_PCT_AST_3PM_y','vs_avg_PCT_UAST_3PM_y']]

    dfPAINT = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_OREB_x','avg_OREB%_x','avg_PCT_PTS_PAINT_x','avg_PTS_2ND_CHANCE_x','avg_PTS_PAINT_x','avg_Fouls_Drawn_x',
    'avg_times_blocked_x','vs_avg_DREB_y','vs_avg_DREB%_y','vs_avg_PCT_PTS_PAINT_y','vs_avg_PTS_2ND_CHANCE_y','vs_avg_PTS_PAINT_y','vs_avg_Fouls_y',
    'vs_avg_BLK_y']]

    dfFF = df[['GAMECODE','TEAM_ABBREVIATION_x_x','avg_AST_x','avg_AST_PCT_x','avg_AST_TOV_x','avg_AST_RATIO_x','avg_EFG%_x','avg_TS_x','avg_FGA_x',
    'avg_FG%_x','avg_FTA_x','avg_FT%_x','avg_FTA_RATE_x','avg_FT_RATE_x','avg_Fouls_Drawn_x','avg_Steals_x','avg_times_blocked_x','avg_TO_x','avg_TO%_x',
    'avg_OREB_x','avg_OREB%_x','avg_PCT_FGA_2PT_x','avg_PCT_PTS_FT_x','avg_PCT_PTS_OFF_TOV_x','avg_PCT_AST_2PM_x','avg_PCT_AST_FGM_x','avg_PTS_OFF_TOV_x',
    'vs_avg_AST_y','vs_avg_AST_PCT_y','vs_avg_AST_TOV_y','vs_avg_AST_RATIO_y','vs_avg_EFG%_y','vs_avg_TS_y','vs_avg_FGA_y','vs_avg_FG%_y','vs_avg_FTA_y'
    ,'vs_avg_FT%_y','vs_avg_FTA_RATE_y','vs_avg_FT_RATE_y','vs_avg_Fouls_Drawn_y','vs_avg_Steals_y','vs_avg_times_blocked_y','vs_avg_TO_y','vs_avg_TO%_y',
    'vs_avg_OREB_y','vs_avg_OREB%_y','vs_avg_PCT_FGA_2PT_y','vs_avg_PCT_PTS_FT_y','vs_avg_PCT_PTS_OFF_TOV_y','vs_avg_PCT_AST_2PM_y','vs_avg_PCT_AST_FGM_y'
    ,'vs_avg_PTS_OFF_TOV_y','avg_DREB_y','avg_DREB%_y']]

    dfRTG = df[['GAMECODE','TEAM_ABBREVIATION_x_x','AvgORTG_x','AvgDRTG_x','AvgNET_x','HomeORTG_x','AwayORTG_x','HomeDRTG_x','AwayDRTG_x',
    'Location_Avg_ORTG_x','Location_Avg_DRTG_x','AvgPace_x','Avg_Possessions_x','est_avg_Poss_x','AvgPTS_x','vs_AvgORTG_x','vs_AvgDRTG_x','vs_AvgNET_x','vs_HomeORTG_x',
    'vs_AwayORTG_x','vs_HomeDRTG_x','vs_AwayDRTG_x','vs_AvgPace_x','vs_est_avg_Poss_x','vs_Avg_Possessions_x']]

    dfs = [dfbase,df3PT,dfPAINT,dfFF,dfRTG]
    df_final = reduce(lambda left,right: pd.merge(left,right,on=['GAMECODE','TEAM_ABBREVIATION_x_x']), dfs)


    saveToExcel(df_final,'DataForModel_'+ year +'.xlsx','Master')
    return df_final
예제 #2
0
파일: Predict.py 프로젝트: popo9192/NBA
def getResults(ActualDF, ProjectionsDF, Period):
    dfr = ActualDF[['GAMECODE', 'TEAM_ABBREVIATION', 'PTS']]
    dfr['GAMECODE_x'] = dfr['GAMECODE']
    dfr['TEAM_ABBREVIATION_x_x'] = dfr['TEAM_ABBREVIATION']
    # dfr['TEAM_ABBREVIATION_x'] = dfr['TEAM_ABBREVIATION']
    # --------------MAKE TEAMABBR_XX for daily----------------
    dfr = dfr[['GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'PTS']]
    # print(dfr.head(),ProjectionsDF.head())
    df4 = pd.merge(ProjectionsDF,
                   dfr,
                   on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'],
                   how='outer')
    # print(df4.head())
    df4 = df4.dropna()
    df4['ActualSpread'] = df4['PTS'].shift(1) - df4['PTS']
    df4['ActualOU'] = df4['PTS'].shift(-1) + df4['PTS']

    df4['ActualLines'] = df4.apply(getActualLines, axis=1)
    df4['BetType'] = df4.apply(getBetType, axis=1)
    df4['Correct'] = df4.apply(isBetCorrect, axis=1)
    df4 = df4.loc[df4['Correct'] != 'Push']
    print('Results Found')
    # dfs = getDataSet('Season_Results.xlsx')
    # dfs = dfs.append(df4)
    saveToExcel(df4, Period + '_Results.xlsx', 'Master')
    return df4
예제 #3
0
파일: RTGModel.py 프로젝트: popo9192/NBA
def backtest(year, both):
    odds = getDataSet('Historical_Odds_' + year + '.xlsx')
    if both:
        odds1 = getDataSet('Historical_Odds_' + '2015' + '.xlsx')
        odds2 = getDataSet('Historical_Odds_' + '2016' + '.xlsx')
        frames = [odds1, odds2]
        odds = pd.concat(frames)
    df = getDataSet('DataForModel_' + year + '.xlsx')
    if both:
        df1 = getDataSet('DataForModel_' + '2015' + '.xlsx')
        df2 = getDataSet('DataForModel_' + '2016' + '.xlsx')
        frames = [df1, df2]
        df = pd.concat(frames)
    actual = getDataSet('AllStats_' + year + '.xlsx')
    if both:
        actual1 = getDataSet('AllStats_' + '2015' + '.xlsx')
        actual2 = getDataSet('AllStats_' + '2016' + '.xlsx')
        frames = [actual1, actual2]
        actual = pd.concat(frames)
    Regression(df)
    proj = RunModels(df, odds)
    results = getResults(actual, proj, 'Backtest')
    # dfr = pd.merge(dfd, results, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left')
    # saveToExcel(dfr,'BackTest_Data.xlsx','Master')
    dfb = getResultSummary(results)
    if both:
        saveToExcel(dfb, 'BackTest_Summary.xlsx', 'Both')
    if not both:
        saveToExcel(dfb, 'BackTest_Summary.xlsx', year)
예제 #4
0
파일: Predict.py 프로젝트: popo9192/NBA
def getAllOdds(year):
    df = getDataSet('DataForModel_' + year + '.xlsx')
    dates = df.GAME_DATE_EST_x_x.unique()
    dfall = getDataSet('Historical_Odds_' + year + '.xlsx')
    done = dfall.GAME_DATE_EST_x.unique()
    for i in dates:
        if i > done[-1]:
            x = i
            i = i[:10]
            i = i.replace("-", "")
            print(i)
            df = main(i)
            # df = getDataSet('Todays_Odds.xlsx')
            df['AwayTeam'] = df.apply(getTeams, axis=1)
            df['HomeTeam'] = df.apply(getOppTeams, axis=1)
            df['key'] = df['key'].astype(str)
            df['GAMECODE_x'] = df.apply(getGameCodeODDS, axis=1)
            df['VegasLines'] = df.apply(filterOdds, axis=1)
            df['TEAM_ABBREVIATION_x_x'] = df['AwayTeam']
            df = pd.DataFrame({
                'GAMECODE_x':
                df['GAMECODE_x'],
                'TEAM_ABBREVIATION_x_x':
                df['TEAM_ABBREVIATION_x_x'],
                'VegasLines':
                df['VegasLines'],
                'GAME_DATE_EST_x':
                x
            })
            # saveToExcel(df,'Todays_Odds.xlsx','Master')
            # dfh = getDataSet('Historical_Odds.xlsx')
            dfall = dfall.append(df)
        saveToExcel(dfall, 'Historical_Odds_' + year + '.xlsx', 'Master')
예제 #5
0
파일: 3Pt_Model.py 프로젝트: popo9192/NBA
def RunModels(df, odds):
    dfx, dfy, dfh, dfa, yh, ya = buildModel(df)

    ThreeModel = LoadModel('3PT_Model.sav')

    x = dfx.values
    y = dfy.values

    threePred = ThreeModel.predict(x)

    df = pd.DataFrame({
        'GAMECODE_x': dfy['GAMECODE'],
        'TEAM_ABBREVIATION_x_x': dfy['TEAM_ABBREVIATION_x_x'],
        'Predicted': threePred
    })
    df3 = pd.merge(df,
                   odds,
                   on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'],
                   how='left')
    df3['CalculatedSpread'] = df3['Predicted'].shift(1) - df3['Predicted']
    df3['CalculatedOU'] = df3['Predicted'].shift(-1) + df3['Predicted']
    df3['CalculatedLines'] = df3.apply(getCalcedLines, axis=1)
    df3 = df3[[
        'GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'Predicted', 'CalculatedLines',
        'VegasLines'
    ]]
    df3['Difference'] = df3['CalculatedLines'] - df3['VegasLines']
    df3['BetGrade'] = df3.apply(gradeBet, axis=1)

    # df4 = pd.merge(df3, df, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left')
    saveToExcel(df3, 'BackTest_Data.xlsx', 'Master')
    return df3
예제 #6
0
파일: Predict.py 프로젝트: popo9192/NBA
def GetYesterdaysData():
    yesterdaysGames = getGames(yesterday)
    df = getADVStats(yesterdaysGames)
    saveToExcel(df, 'yesterdaysGames.xlsx', 'Master')
    df1 = getDataSet('AllStats_' + year + '.xlsx')
    df2 = getDataSet('yesterdaysGames.xlsx')
    df4 = df1.tail(1)
    x = df4['GAMECODE'].str[:9].values
    df5 = df2.head(1)
    y = df5['GAMECODE'].str[:9].values
    df3 = df1.append(df2)
    if x != y:
        saveToExcel(df3, 'AllStats_' + year + '.xlsx', 'Master')
    return df3
예제 #7
0
파일: RTGModel.py 프로젝트: popo9192/NBA
def RunModels(df, odds):
    dfx, dfy, dfh, dfa, yh, ya = buildModel(df)

    home_model = LoadModel('Backtest_Home_Model.sav')
    away_model = LoadModel('Backtest_Away_Model.sav')

    xh = dfh.values
    xa = dfa.values

    # yh = yh.values
    # ya = ya.values

    homepred = home_model.predict(xh)
    awaypred = away_model.predict(xa)

    dfh1 = pd.DataFrame({
        'GAMECODE_x': yh['GAMECODE'],
        'TEAM_ABBREVIATION_x_x': yh['TEAM_ABBREVIATION_x_x'],
        'Predicted': homepred
    })
    dfa1 = pd.DataFrame({
        'GAMECODE_x': ya['GAMECODE'],
        'TEAM_ABBREVIATION_x_x': ya['TEAM_ABBREVIATION_x_x'],
        'Predicted': awaypred
    })
    df2 = dfh1.append(dfa1)
    df3 = pd.merge(df2,
                   odds,
                   on=['GAMECODE_x', 'TEAM_ABBREVIATION_x_x'],
                   how='left')
    df3['CalculatedSpread'] = df3['Predicted'].shift(1) - df3['Predicted']
    df3['CalculatedOU'] = df3['Predicted'].shift(-1) + df3['Predicted']
    df3['CalculatedLines'] = df3.apply(getCalcedLines, axis=1)
    df3 = df3[[
        'GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'Predicted', 'CalculatedLines',
        'VegasLines'
    ]]
    df3['Difference'] = df3['CalculatedLines'] - df3['VegasLines']
    df3['BetGrade'] = df3.apply(gradeBet, axis=1)

    # df4 = pd.merge(df3, df, on=['GAMECODE_x','TEAM_ABBREVIATION_x'],how='left')
    saveToExcel(df3, 'BackTest_Data.xlsx', 'Master')
    return df3
예제 #8
0
파일: Predict.py 프로젝트: popo9192/NBA
def GetOdds():
    df1 = getDataSet('Historical_Odds_' + '2017' + '.xlsx')
    if fetchOdds:
        scrapeOdds()
        df = getDataSet('Todays_Odds.xlsx')
        df['AwayTeam'] = df.apply(getTeams, axis=1)
        df['HomeTeam'] = df.apply(getOppTeams, axis=1)
        df['key'] = df['key'].astype(str)
        df['GAMECODE_x'] = df.apply(getGameCodeODDS, axis=1)
        df['VegasLines'] = df.apply(filterOdds, axis=1)
        df['TEAM_ABBREVIATION_x_x'] = df['AwayTeam']
        df = df[['GAMECODE_x', 'TEAM_ABBREVIATION_x_x', 'VegasLines']]
        saveToExcel(df, 'Todays_Odds.xlsx', 'Master')
        df1 = df1.append(df)
        saveToExcel(df1, 'Season_Odds.xlsx', 'Master')

    # df3 = df1.tail(1)
    # x = df3['GAMECODE_x'].str[:9].values
    # df4 =df.head(1)
    # y = df4['GAMECODE_x'].str[:9].values
    # if x != y:
    #     df2 = df1.append(df)
    #     saveToExcel(df2,'Season_Odds.xlsx','Master')
    return df1
예제 #9
0
파일: getNBAData.py 프로젝트: popo9192/NBA
        json_inp = getStat(e, params)
        df = _api_scrape(json_inp, ndx)
        if e == 'boxscoresummaryv2':
            df = df[['GAME_DATE_EST', 'GAMECODE']]
        df1 = pd.concat([df1, df], axis=1)
    # print(df1)
    return (df1)


def getADVStats(gameList, endpoints, params, ndx):
    df1 = pd.DataFrame()
    for a in gameList:
        print(a)
        params['GameID'] = a
        df = hitEndpoints(endpoints, params, ndx)
        df1 = pd.concat([df1, df], axis=0)
    df1.fillna(method='ffill', inplace=True)
    # print(df1.head())
    print('Stats Compiled')
    # print(df1)
    return df1


year = "2017"
gameList = getAllGames(year)
# gameList = gameList[:5]
df = getADVStats(gameList, endpoints, params, ndx)
saveToExcel(df, "AllStats_" + year + ".xlsx", year)

# print(params)
예제 #10
0
파일: 3Pt_Model.py 프로젝트: popo9192/NBA
def Regression(df):
    df, y, dfh, dfa, yh, ya = buildModel(df)
    y_stats = y.iloc[:, 2].values
    # NOT SPLITTING HOME AND AWAY
    x = df.values
    y = y.values

    # # FOR HOME AND AWAY SPLIT
    # df, y, dfh, dfa, yh, ya = buildModel(df)
    # xh = dfh.values
    # xa = dfa.values
    # yh = yh.values
    # ya = ya.values

    # -------------Split Train and Test Data-------------
    # NOT SPLITTING HOME AND AWAY
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=0)
    y = y[:, 2]

    y_train = y_train[:, 2]
    y_compare = y_test
    y_test = y_test[:, 2]

    # # FOR HOME AND AWAY SPLIT
    # x_train_h, x_test_h, y_train_h, y_test_h = train_test_split(xh,yh,test_size =0.25, random_state =0)
    # yh = yh[:,2]
    # y_train_h = y_train_h[:,2]
    # y_compare_h = y_test_h
    # y_test_h = y_test_h[:,2]
    #
    # x_train_a, x_test_a, y_train_a, y_test_a = train_test_split(xa,ya,test_size =0.25, random_state =0)
    # ya = ya[:,2]
    # y_train_a = y_train_a[:,2]
    # y_compare_a = y_test_a
    # y_test_a = y_test_a[:,2]
    #
    # x_train_p, x_test_p, y_train_p, y_test_p = train_test_split(xp,yp,test_size =0.25, random_state =0)
    # yp = yp[:,2]
    # y_train_p = y_train_p[:,2]
    # y_compare_p = y_test_p
    # y_test_p = y_test_p[:,2]
    #
    # x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(x2,y2,test_size =0.25, random_state =0)
    # y2 = y2[:,2]
    # y_train_2 = y_train_2[:,2]
    # y_compare_2 = y_test_2
    # y_test_2 = y_test_2[:,2]

    # ------------------Linear--------------------

    # ------------------------------REGRESSION TIME------------------------------
    # NOT SPLITTING HOME AND AWAY
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # # FOR HOME AND AWAY SPLIT
    # regressor_h = LinearRegression()
    # regressor_a = LinearRegression()
    # regressor_p = LinearRegression()
    # regressor_h.fit(x_train_h, y_train_h)
    # regressor_a.fit(x_train_a, y_train_a)

    # #-------------Predict a new result with Random Forest-------------
    # NOT SPLITTING HOME AND AWAY
    y_pred = regressor.predict(x_test)

    # FOR HOME AND AWAY SPLIT
    # y_pred_h = regressor_h.predict(x_test_h)
    # y_pred_a = regressor_a.predict(x_test_a)
    # y_pred_p = regressor_p.predict(x_test_p)
    # y_pred_2 = regressor_2.predict(x_test_2)

    # #------------------RANDOM FOREST--------------------
    # regressorRF = RandomForestRegressor(n_estimators=3000, random_state=0)
    #
    # regressorRF.fit(x_train,y_train)
    # y_pred = regressorRF.predict(x_test)
    # r2 = regressorRF.score(x_train, y_train)
    # mae = mean_absolute_error(y_test, y_pred)
    # mse = mean_squared_error(y_test, y_pred)
    # evs = explained_variance_score(y_test, y_pred)
    #
    # print(r2)

    # print('MAE ', mae)
    # print('MSE ', mse)
    # print('Explained Variance ', evs)

    #
    # imp = regressorRF.feature_importances_
    # print(imp)

    # -----------------LINEAR model Scores-------------------
    # import statsmodels.api as sm
    # x = sm.add_constant(x)
    # x_opt = x[:,[0,1]]
    # # print(x_opt.dtype,y.dtype,y_stats.dtype)
    # regressor_ols = sm.OLS(endog = y_stats, exog = x_opt).fit()
    # # print(regressor_ols.summary())

    #-----------------------OUTPUT---------------------

    df1 = pd.DataFrame({
        'GAMECODE': y_compare[:, 0],
        'TEAM_ABBREVIATION_x_x': y_compare[:, 1],
        'Actual': y_test,
        'Predicted': y_pred
    })
    df1 = df1.sort_values(by=['GAMECODE'], ascending=[True])
    # df4 = pd.merge(df3, df1, on=['GAMECODE','TEAM_ABBREVIATION_x'])
    df1['Mean_Avg_Err'] = (df1['Predicted'] - df1['Actual']).abs()
    df1['Mean_SQ_Err'] = df1['Mean_Avg_Err'] * df1['Mean_Avg_Err']
    df1['Last_10_Avg_Err'] = df1['Mean_Avg_Err'].rolling(window=10).mean()
    df1['Last_10_SQ_Err'] = df1['Mean_SQ_Err'].rolling(window=10).mean()
    saveToExcel(df1, 'Model_Results.xlsx', 'Master')

    print('MAE:', df1['Mean_Avg_Err'].mean())
    print('MSE:', df1['Mean_SQ_Err'].mean())

    ThreePTModelFile = '3PT_Model.sav'
    pickle.dump(regressor, open(ThreePTModelFile, 'wb'))