Esempio n. 1
0
def get_viz_data_for_team_season(team_abbreviation):
    log = TeamAdvancedGameLogs().get_data({'Season': season, 'LastNGames': '3'}, override_file=True)
    log = log[log['TEAM_ABBREVIATION'] == team_abbreviation]

    pbp_ep = PlayByPlay()

    season_player_stints_df = pd.DataFrame()
    games = log.GAME_ID.tolist()
    for game in games:
        game = str(game)
        if len(game) < 10:
            game = '00' + str(game)

        pbp_df = pbp_ep.get_data({'Season': season, 'GameID': game})
        pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD'])

        game_stints_df = get_game_player_stints_for_team(pbp_df, team_abbreviation)
        season_player_stints_df = season_player_stints_df.append(game_stints_df)

    rotation_data = transform_stints_for_viz(season_player_stints_df)

    players = season_player_stints_df['player'].unique()
    players = sorted(players,
                     key=lambda x: -season_player_stints_df[season_player_stints_df['player'] == x]['time'].sum())

    index = 1
    rotation_data['pindex'] = 0
    for player in players:
        sys.stdout.write("\"" + player + "\",")
        cond = rotation_data.player == player
        rotation_data.pindex[cond] = index
        index += 1

    file_check(season_file_path)
    rotation_data.to_csv(season_file_path)
Esempio n. 2
0
def get_rpm():
    file_path = data_dir + 'RPM/' + str(datetime.date.today()) + '.csv'

    if not file_check(file_path):
        rpm_url = 'http://www.espn.com/nba/statistics/rpm/_/page/{page}/sort/RPM'
        headers = []
        data = []
        for p in range(1, 12):
            r = requests.post(rpm_url.format(page=p))
            html = r.content
            soup = BeautifulSoup(html)
            if p == 1:
                headers = [th.getText() for th in soup.find_all('tr')[0].find_all('td')]
            rows = soup.find_all('tr')[1:]
            data.extend([[td.getText() for td in rows[i].find_all('td')] for i in range(len(rows))])

        df = pd.DataFrame(data, columns=headers)
        df['POS'] = df.NAME.apply(lambda x: x.split(',')[1])
        df.NAME = df.NAME.apply(lambda x: x.split(',')[0])
        df.RPM = df.RPM.astype(float)
        df.MPG = df.MPG.astype(float)
        df.GP = df.GP.astype(int)
        df.to_csv(file_path)
        return df
    else:
        return pd.read_csv(file_path)
Esempio n. 3
0
def merge_shot_pbp_for_season(season, season_type='Regular Season', override_file=False):
    file_path = data_dir + 'merged_shot_pbp/' + season + '.csv'

    if override_file or not file_check(file_path):
        play_by_play_endpoint = PlayByPlay()
        shot_endpoint = ShotChartDetail()

        pbp_df = pd.DataFrame()
        log = TeamAdvancedGameLogs().get_data({'Season': season, 'SeasonType': season_type}, override_file=True)
        games = log.GAME_ID.unique()
        for g in games:
            if len(str(g)) < 10:
                g = '00' + str(g)
            pbp_df = pbp_df.append(
                play_by_play_endpoint.get_data({'GameID': g, 'Season': season, 'SeasonType': season_type}))

        pbp_df['GAME_ID'] = '00' + pbp_df['GAME_ID'].astype(str)
        shots_df = shot_endpoint.get_data({'Season': season, 'SeasonType': season_type}, override_file=True)
        merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'GAME_ID', 'PERIOD'],
                            right_on=['GAME_EVENT_ID', 'GAME_ID', 'PERIOD'])

        merge_df.to_csv(file_path)
        return merge_df
    else:
        return pd.read_csv(file_path)
Esempio n. 4
0
def merge_shot_pbp_for_season(season, season_type='Regular Season', override_file=False):
    file_path = data_dir + 'merged_shot_pbp/' + season + '.csv'

    if override_file or not file_check(file_path):
        play_by_play_endpoint = PlayByPlay()
        shot_endpoint = ShotChartDetail()

        pbp_df = pd.DataFrame()
        log = TeamAdvancedGameLogs().get_data({'Season': season, 'SeasonType': season_type}, override_file=True)
        games = log.GAME_ID.unique()
        for g in games:
            if len(str(g)) < 10:
                g = '00' + str(g)
            pbp_df = pbp_df.append(
                play_by_play_endpoint.get_data({'GameID': g, 'Season': season, 'SeasonType': season_type}))

        pbp_df['GAME_ID'] = '00' + pbp_df['GAME_ID'].astype(str)
        shots_df = shot_endpoint.get_data({'Season': season, 'SeasonType': season_type}, override_file=True)
        merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'GAME_ID', 'PERIOD'],
                            right_on=['GAME_EVENT_ID', 'GAME_ID', 'PERIOD'])

        merge_df.to_csv(file_path)
        return merge_df
    else:
        return pd.read_csv(file_path)
Esempio n. 5
0
def get_rotation_data_for_game(game_id,
                               year='2017-18',
                               single_game_file_path='./single_game/'):
    pbp_ep = PlayByPlay()

    game_id = str(game_id)
    if len(game_id) < 10:
        game_id = '00' + str(game_id)

    pbp_df = pbp_ep.get_data({'Season': year, 'GameID': game_id})
    pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD'])

    teams = pbp_df['PLAYER1_TEAM_ABBREVIATION'].unique()[1:]
    rotation_df = pd.DataFrame()
    index = 1
    for t in teams:
        team_df = get_team_df(pbp_df, t)
        team_lineups = get_game_lineups_for_team(team_df)
        team_game_player_stints_df = get_game_player_stints_for_team(
            team_lineups)
        team_rotation_df = transform_stints_for_viz(team_game_player_stints_df)

        players = team_game_player_stints_df['player'].unique()
        players = sorted(
            players,
            key=lambda x: -team_game_player_stints_df[
                team_game_player_stints_df['player'] == x]['time'].sum())

        team_rotation_df['pindex'] = 0
        for player in players:
            cond = team_rotation_df.player == player
            team_rotation_df.pindex[cond] = index
            index += 1

        index += 1

        rotation_df = rotation_df.append(team_rotation_df)

    file_check(single_game_file_path)
    rotation_df.to_csv(single_game_file_path + 'data.csv')

    score_df = get_score_data_for_game(game_id)
    score_df.to_csv(single_game_file_path + 'score.csv')
Esempio n. 6
0
def get_viz_data_for_team_season(team_abbreviation):
    log = TeamAdvancedGameLogs().get_data({
        'Season': season,
        'LastNGames': '3'
    },
                                          override_file=True)
    log = log[log['TEAM_ABBREVIATION'] == team_abbreviation]

    pbp_ep = PlayByPlay()

    season_player_stints_df = pd.DataFrame()
    games = log.GAME_ID.tolist()
    for game in games:
        game = str(game)
        if len(game) < 10:
            game = '00' + str(game)

        pbp_df = pbp_ep.get_data({'Season': season, 'GameID': game})
        pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD'])

        game_stints_df = get_game_player_stints_for_team(
            pbp_df, team_abbreviation)
        season_player_stints_df = season_player_stints_df.append(
            game_stints_df)

    rotation_data = transform_stints_for_viz(season_player_stints_df)

    players = season_player_stints_df['player'].unique()
    players = sorted(players,
                     key=lambda x: -season_player_stints_df[
                         season_player_stints_df['player'] == x]['time'].sum())

    index = 1
    rotation_data['pindex'] = 0
    for player in players:
        sys.stdout.write("\"" + player + "\",")
        cond = rotation_data.player == player
        rotation_data.pindex[cond] = index
        index += 1

    file_check(season_file_path)
    rotation_data.to_csv(season_file_path)
Esempio n. 7
0
    def get_data(self, passed_params=default_params, override_file=False):
        check_params(passed_params)
        params = self.set_params(passed_params)

        file_path = self.determine_file_path(params)

        if (not file_check(file_path)) or override_file:
            r = requests.post(self.base_url,
                              data=params,
                              headers=request_headers)

            print(
                str(r.status_code) + ': ' +
                construct_full_url(self.base_url, params))

            data = r.json()['resultSets'][1]

            headers = [
                'GROUP_SET_ON', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
                'VS_PLAYER_ID', 'VS_PLAYER_NAME', 'COURT_STATUS_ON', 'GP_ON',
                'MIN_ON', 'PLUS_MINUS_ON', 'OFF_RATING_ON', 'DEF_RATING_ON',
                'NET_RATING_ON'
            ]

            rows = data['rowSet']
            data_dict = [dict(zip(headers, row)) for row in rows]
            on_df = pd.DataFrame(data_dict)
            data = r.json()['resultSets'][2]

            headers = [
                'GROUP_SET_OFF', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
                'VS_PLAYER_ID', 'VS_PLAYER_NAME', 'COURT_STATUS_OFF', 'GP_OFF',
                'MIN_OFF', 'PLUS_MINUS_OFF', 'OFF_RATING_OFF',
                'DEF_RATING_OFF', 'NET_RATING_OFF'
            ]

            rows = data['rowSet']
            data_dict = [dict(zip(headers, row)) for row in rows]
            off_df = pd.DataFrame(data_dict)

            df = pd.merge(on_df,
                          off_df,
                          on=[
                              'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
                              'VS_PLAYER_ID', 'VS_PLAYER_NAME'
                          ])

            df.to_csv(file_path)

            return df

        else:
            print(file_path)
            return pd.read_csv(file_path)
Esempio n. 8
0
def get_rotation_data_for_game(game_id, year='2017-18', single_game_file_path='./single_game/'):
    pbp_ep = PlayByPlay()

    game_id = str(game_id)
    if len(game_id) < 10:
        game_id = '00' + str(game_id)

    pbp_df = pbp_ep.get_data({'Season': year, 'GameID': game_id})
    pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD'])

    teams = pbp_df['PLAYER1_TEAM_ABBREVIATION'].unique()[1:]
    rotation_df = pd.DataFrame()
    index = 1
    for t in teams:
        team_df = get_team_df(pbp_df, t)
        team_lineups = get_game_lineups_for_team(team_df)
        team_game_player_stints_df = get_game_player_stints_for_team(team_lineups)
        team_rotation_df = transform_stints_for_viz(team_game_player_stints_df)

        players = team_game_player_stints_df['player'].unique()
        players = sorted(players,
                         key=lambda x: -team_game_player_stints_df[team_game_player_stints_df['player'] == x][
                             'time'].sum())

        team_rotation_df['pindex'] = 0
        for player in players:
            cond = team_rotation_df.player == player
            team_rotation_df.pindex[cond] = index
            index += 1

        index += 1

        rotation_df = rotation_df.append(team_rotation_df)

    file_check(single_game_file_path)
    rotation_df.to_csv(single_game_file_path + 'data.csv')

    score_df = get_score_data_for_game(game_id)
    score_df.to_csv(single_game_file_path + 'score.csv')
Esempio n. 9
0
def merge_shot_pbp_for_game(pbp_df, game_id, season, season_type='Regular Season', merge_type='inner',
                            override_file=False):
    file_path = data_dir + 'merged_shot_pbp/' + season + '/' + game_id + '.csv'

    if override_file or not file_check(file_path):
        shot_endpoint = ShotChartDetail()

        shots_df = shot_endpoint.get_data(
            {'Season': season, 'SeasonType': season_type, 'GameID': game_id},
            override_file=override_file)

        merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'PERIOD'],
                            right_on=['GAME_EVENT_ID', 'PERIOD'], how=merge_type)

        merge_df.to_csv(file_path)
        return merge_df
    else:
        return pd.read_csv(file_path)
Esempio n. 10
0
    def get_data(self, passed_params=default_params, override_file=False):
        check_params(passed_params)
        params = self.set_params(passed_params)
        file_path = self.determine_file_path(params)

        if (not file_check(file_path)) or override_file:
            full_url = construct_full_url(self.base_url, params)
            print(full_url)
            r = requests.get(full_url, headers=self.synergy_request_headers)

            if r.status_code != 200:
                raise ConnectionError('{}:{}'.format(r.status_code, r.reason))
            df = pd.DataFrame(r.json()['results'])
            df.to_csv(file_path)
            return df

        else:
            print(file_path)
            return pd.read_csv(file_path)
Esempio n. 11
0
    def get_data(self, passed_params=default_params, override_file=False):
        check_params(passed_params)
        params = self.set_params(passed_params)
        file_path = self.determine_file_path(params)

        if (not file_check(file_path)) or override_file:
            print(construct_full_url(self.base_url, params))
            r = requests.post(self.base_url,
                              data=params,
                              headers=request_headers)
            if r.status_code != 200:
                raise ConnectionError(
                    str(r.status_code) + ': ' + str(r.reason))

            df = json_to_pandas(r.json(), self.index)
            df.to_csv(file_path)
            return df
        else:
            print(file_path)
            return pd.read_csv(file_path)
Esempio n. 12
0
def get_tov_pbp_data(override_file=False):
    if override_file or not file_check(file_path):
        pbp_ep = PlayByPlay()
        log_ep = TeamAdvancedGameLogs()

        log_df = log_ep.get_data({'Season': season}, override_file=False)

        season_pbp_df = pd.DataFrame()
        for g in log_df['GAME_ID'].unique():
            g = str(g)
            if len(g) < 10:
                g = '00' + g
            game_df = pbp_ep.get_data({'Season': season, 'GameID': g})
            game_df = game_df[game_df['EVENTMSGTYPE'] == 5]
            season_pbp_df = season_pbp_df.append(game_df)

        season_pbp_df = season_pbp_df.fillna('')
        season_pbp_df.to_csv(file_path)
        return season_pbp_df
    else:
        return pd.read_csv(file_path)
Esempio n. 13
0
def get_tov_pbp_data(override_file=False):
    if override_file or not file_check(file_path):
        pbp_ep = PlayByPlay()
        log_ep = TeamAdvancedGameLogs()

        log_df = log_ep.get_data({'Season': season}, override_file=False)

        season_pbp_df = pd.DataFrame()
        for g in log_df['GAME_ID'].unique():
            g = str(g)
            if len(g) < 10:
                g = '00' + g
            game_df = pbp_ep.get_data({'Season': season, 'GameID': g})
            game_df = game_df[game_df['EVENTMSGTYPE'] == 5]
            season_pbp_df = season_pbp_df.append(game_df)

        season_pbp_df = season_pbp_df.fillna('')
        season_pbp_df.to_csv(file_path)
        return season_pbp_df
    else:
        return pd.read_csv(file_path)
Esempio n. 14
0
def merge_shot_pbp_for_game(season, game_id, season_type='Regular Season', override_file=False):
    file_path = data_dir + 'merged_shot_pbp/' + season + '/' + game_id + '.csv'

    if override_file or not file_check(file_path):
        play_by_play_endpoint = PlayByPlay()
        shot_endpoint = ShotChartDetail()

        pbp_df = play_by_play_endpoint.get_data({'GameID': game_id, 'Season': season, 'SeasonType': season_type})

        pbp_df['GAME_ID'] = '00' + pbp_df['GAME_ID'].astype(str)

        shots_df = shot_endpoint.get_data({'Season': season, 'SeasonType': season_type, 'GameID': game_id},
                                          override_file=override_file)

        merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'GAME_ID', 'PERIOD'],
                            right_on=['GAME_EVENT_ID', 'GAME_ID', 'PERIOD'], how='left')

        merge_df.to_csv(file_path)
        return merge_df
    else:
        return pd.read_csv(file_path)
Esempio n. 15
0
    def aggregate_data(self,
                       season='2017-18',
                       season_type='Regular Season',
                       override_file=False):

        file_path = data_dir + '/boxscorematchups/aggregate_{}.csv'.format(
            season)

        if (not file_check(file_path)) or override_file:

            log = TeamAdvancedGameLogs().get_data(
                {
                    'Season': season,
                    'SeasonType': season_type
                },
                override_file=True)

            games = [
                '00' + str(g) if len(str(g)) < 10 else str(g)
                for g in log.GAME_ID.unique()
            ]

            season_df = pd.concat(
                [self.get_data({'GameID': g}) for g in games])

            sum_col = [
                'AST', 'BLK', 'DEF_FOULS', 'FG3A', 'FG3M', 'FGA', 'FGM', 'FTM',
                'HELP_BLK', 'HELP_BLK_REC', 'OFF_FOULS', 'PLAYER_PTS', 'POSS',
                'SFL', 'TEAM_PTS', 'TOV'
            ]

            group_col = ['OFF_PLAYER_NAME', 'DEF_PLAYER_NAME']

            df = season_df.groupby(group_col)[sum_col].sum()
            df.reset_index(inplace=True)
            df = df[df['POSS'] >= 10]
            df.to_csv(file_path)
            return df
        else:
            return pd.read_csv(file_path)
Esempio n. 16
0
def get_rpm():
    file_path = data_dir + 'RPM/' + str(datetime.date.today()) + '.csv'

    if not file_check(file_path):
        rpm_url = 'http://www.espn.com/nba/statistics/rpm/_/page/{page}/sort/RPM'
        headers = []
        data = []
        for p in range(1, 12):
            r = requests.post(rpm_url.format(page=p))
            html = r.content
            soup = BeautifulSoup(html)
            if p == 1:
                headers = [th.getText() for th in soup.find_all('tr')[0].find_all('td')]
            rows = soup.find_all('tr')[1:]
            data.extend([[td.getText() for td in rows[i].find_all('td')] for i in range(len(rows))])

        df = pd.DataFrame(data, columns=headers)
        df.NAME = df.NAME.apply(lambda x: x.split(',')[0])
        df.to_csv(file_path)
        return df
    else:
        return pd.read_csv(file_path)
Esempio n. 17
0
def get_season_schedule(season='2017-18', override_file=False):
    file_path = data_dir + season + '_schedule.csv'

    if (not file_check(file_path)) or override_file:

        base_url = 'https://www.basketball-reference.com/leagues/NBA_2018_games-{}.html'
        months = [
            'october', 'november', 'december', 'january', 'february', 'march',
            'april'
        ]

        schedule_data = []
        for month in months:
            r = requests.post(base_url.format(month))
            html = r.content
            soup = BeautifulSoup(html)
            if month == months[0]:
                headers = [
                    th.getText()
                    for th in soup.find_all('tr')[0].find_all('th')
                ]
            rows = soup.find_all('tr')[1:]
            schedule_data.extend(
                [[td.getText() for td in rows[i].find_all(['th', 'td'])]
                 for i in range(len(rows))])

        schedule_df = pd.DataFrame(schedule_data, columns=headers)
        schedule_df.fillna(0)

        schedule_df.rename(columns={
            'Home/Neutral': 'Home',
            'Visitor/Neutral': 'Visitor'
        },
                           inplace=True)
        schedule_df.to_csv(file_path)
        return schedule_df
    else:
        return pd.read_csv(file_path)