Example #1
0
    def betting_stats(self, stat_names=None, window=None):
        data = self.game_stats()
        data['PACE'] = team_stats.pace(data)
        data['POSSESSIONS'] = team_stats.possessions(data)
        data['TEAM_OFF_RTG'] = team_stats.off_rating(data)
        data['TEAM_DEF_RTG'] = team_stats.def_rating(data)
        data['TEAM_NET_RTG'] = data['TEAM_OFF_RTG'] - data['TEAM_DEF_RTG']
        data['TEAM_EFG'] = stats.eff_fg_pct(data, 'TEAM_')
        data['TEAM_TOV_PCT'] = stats.tov_pct(data, 'TEAM_')
        data['TEAM_OREB_PCT'] = team_stats.oreb_pct(data)
        data['TEAM_DREB_PCT'] = team_stats.dreb_pct(data)
        data['TEAM_FT_PER_FGA'] = stats.ft_per_fga(data, 'TEAM_')

        efg = data.TEAM_EFG
        oreb = data.TEAM_OREB_PCT
        dreb = data.TEAM_DREB_PCT
        ftr = data.TEAM_FT_PER_FGA
        tov = data.TEAM_TOV_PCT

        data[
            'TEAM_FOUR_FACTORS'] = 0.4 * efg + 0.2 * oreb + 0.15 * ftr - 0.25 * tov
        data[
            'TEAM_FOUR_FACTORS_REB'] = 0.4 * efg + 0.1 * oreb + 0.1 * dreb + 0.15 * ftr - 0.25 * tov

        if stat_names is None:
            stat_names = [
                'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB',
                'REB', 'AST', 'TOV', 'STL', 'BLK'
            ]
            stat_names = ['TEAM_' + s for s in stat_names] + ['OPP_' + s for s in stat_names] +\
                         ['TEAM_OFF_RTG', 'TEAM_DEF_RTG', 'TEAM_NET_RTG', 'TEAM_EFG', 'TEAM_TOV_PCT',
                          'TEAM_OREB_PCT', 'TEAM_DREB_PCT', 'TEAM_FT_PER_FGA', 'TEAM_FOUR_FACTORS',
                          'TEAM_FOUR_FACTORS_REB', 'PACE', 'POSSESSIONS']

        data = data[['SEASON', 'GAME_ID', 'TEAM_ID'] + stat_names]
        data = self.windowed_stats(data, stat_names, window=window)

        games = pd.read_sql(
            'SELECT * FROM games JOIN betting ON games.ID is betting.GAME_ID',
            self.__conn)
        games = games.merge(data,
                            left_on=['SEASON', 'ID', 'HOME_TEAM_ID'],
                            right_on=['SEASON', 'GAME_ID', 'TEAM_ID'])
        games = games.merge(data,
                            left_on=['SEASON', 'ID', 'AWAY_TEAM_ID'],
                            right_on=['SEASON', 'GAME_ID', 'TEAM_ID'],
                            suffixes=('', '_AWAY'))
        games = games[games.HOME_SPREAD_WL != 'P']

        return games
Example #2
0
    def season_stats(self):
        query = '''
            SELECT SEASON,
                   TEAM_ID,
                   AVG(TEAM_MIN) AS TEAM_MIN,
                   AVG(TEAM_FGM) AS TEAM_FGM,
                   AVG(TEAM_FGA) AS TEAM_FGA,
                   AVG(TEAM_FG3M) AS TEAM_FG3M,
                   AVG(TEAM_FG3A) AS TEAM_FG3A,
                   AVG(TEAM_FTM) AS TEAM_FTM,
                   AVG(TEAM_FTA) AS TEAM_FTA,
                   AVG(TEAM_OREB) AS TEAM_OREB,
                   AVG(TEAM_DREB) AS TEAM_DREB,
                   AVG(TEAM_REB) AS TEAM_REB,
                   AVG(TEAM_AST) AS TEAM_AST,
                   AVG(TEAM_TOV) AS TEAM_TOV,
                   AVG(TEAM_STL) AS TEAM_STL,
                   AVG(TEAM_BLK) AS TEAM_BLK,
                   AVG(TEAM_PTS) AS TEAM_PTS,
                   AVG(TEAM_PLUS_MINUS) AS TEAM_PLUS_MINUS,
                   AVG(OPP_MIN) AS OPP_MIN,
                   AVG(OPP_FGM) AS OPP_FGM,
                   AVG(OPP_FGA) AS OPP_FGA,
                   AVG(OPP_FG3M) AS OPP_FG3M,
                   AVG(OPP_FG3A) AS OPP_FG3A,
                   AVG(OPP_FTM) AS OPP_FTM,
                   AVG(OPP_FTA) AS OPP_FTA,
                   AVG(OPP_OREB) AS OPP_OREB,
                   AVG(OPP_DREB) AS OPP_DREB,
                   AVG(OPP_REB) AS OPP_REB,
                   AVG(OPP_AST) AS OPP_AST,
                   AVG(OPP_TOV) AS OPP_TOV,
                   AVG(OPP_STL) AS OPP_STL,
                   AVG(OPP_BLK) AS OPP_BLK,
                   AVG(OPP_PTS) AS OPP_PTS,
                   AVG(OPP_PLUS_MINUS) AS OPP_PLUS_MINUS
            FROM
                ({})
            GROUP BY SEASON, TEAM_ID
        '''.format(self.__game_query)

        data = pd.read_sql(query, self.__conn)
        data['PACE'] = team_stats.pace(data)
        data['POSSESSIONS'] = team_stats.possessions(data)
        data['TEAM_OFF_RTG'] = team_stats.off_rating(data)
        data['TEAM_DEF_RTG'] = team_stats.def_rating(data)
        data['TEAM_NET_RTG'] = data['TEAM_OFF_RTG'] - data['TEAM_DEF_RTG']
        data['TEAM_EFG'] = stats.eff_fg_pct(data, 'TEAM_')
        data['TEAM_TOV_PCT'] = stats.tov_pct(data, 'TEAM_')
        data['TEAM_OREB_PCT'] = team_stats.oreb_pct(data)
        data['TEAM_DREB_PCT'] = team_stats.dreb_pct(data)
        data['TEAM_FT_PER_FGA'] = stats.ft_per_fga(data, 'TEAM_')

        efg = data.TEAM_EFG
        oreb = data.TEAM_OREB_PCT
        dreb = data.TEAM_DREB_PCT
        ftr = data.TEAM_FT_PER_FGA
        tov = data.TEAM_TOV_PCT

        data[
            'TEAM_FOUR_FACTORS'] = 0.4 * efg + 0.2 * oreb + 0.15 * ftr - 0.25 * tov
        data[
            'TEAM_FOUR_FACTORS_REB'] = 0.4 * efg + 0.1 * oreb + 0.1 * dreb + 0.15 * ftr - 0.25 * tov

        query = '''
            SELECT SEASON, TEAM_ID, OPP_ID, COUNT(OPP_ID) AS GAMES_PLAYED
            FROM
                ({})
            GROUP BY SEASON, TEAM_ID, OPP_ID
        '''.format(self.__game_query)

        opponents = pd.read_sql(query, self.__conn)

        for season in pd.unique(data.SEASON):
            season_opponents = opponents[opponents.SEASON == season]
            teams = pd.unique(season_opponents.TEAM_ID)
            schedule = np.zeros([len(teams), len(teams)])

            for team in teams:
                index = np.array([
                    x in season_opponents[season_opponents.TEAM_ID ==
                                          team].OPP_ID.values for x in teams
                ])
                schedule[team == teams, index] = season_opponents[
                    season_opponents.TEAM_ID == team].GAMES_PLAYED

            schedule /= sum(season_opponents.GAMES_PLAYED) / len(teams)
            point_diff = data[data.SEASON == season].TEAM_PLUS_MINUS.values
            srs = point_diff

            for i in range(10):
                srs = point_diff + schedule.dot(srs)

            data.loc[data.SEASON == season, 'TEAM_SRS'] = srs

        return data