def get_viz_data_for_team_season(team_abbreviation): log = TeamAdvancedGameLogs().get_data({'Season': season, 'LastNGames': '3'}, override_file=True) log = log[log['TEAM_ABBREVIATION'] == team_abbreviation] pbp_ep = PlayByPlay() season_player_stints_df = pd.DataFrame() games = log.GAME_ID.tolist() for game in games: game = str(game) if len(game) < 10: game = '00' + str(game) pbp_df = pbp_ep.get_data({'Season': season, 'GameID': game}) pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD']) game_stints_df = get_game_player_stints_for_team(pbp_df, team_abbreviation) season_player_stints_df = season_player_stints_df.append(game_stints_df) rotation_data = transform_stints_for_viz(season_player_stints_df) players = season_player_stints_df['player'].unique() players = sorted(players, key=lambda x: -season_player_stints_df[season_player_stints_df['player'] == x]['time'].sum()) index = 1 rotation_data['pindex'] = 0 for player in players: sys.stdout.write("\"" + player + "\",") cond = rotation_data.player == player rotation_data.pindex[cond] = index index += 1 file_check(season_file_path) rotation_data.to_csv(season_file_path)
def get_rpm(): file_path = data_dir + 'RPM/' + str(datetime.date.today()) + '.csv' if not file_check(file_path): rpm_url = 'http://www.espn.com/nba/statistics/rpm/_/page/{page}/sort/RPM' headers = [] data = [] for p in range(1, 12): r = requests.post(rpm_url.format(page=p)) html = r.content soup = BeautifulSoup(html) if p == 1: headers = [th.getText() for th in soup.find_all('tr')[0].find_all('td')] rows = soup.find_all('tr')[1:] data.extend([[td.getText() for td in rows[i].find_all('td')] for i in range(len(rows))]) df = pd.DataFrame(data, columns=headers) df['POS'] = df.NAME.apply(lambda x: x.split(',')[1]) df.NAME = df.NAME.apply(lambda x: x.split(',')[0]) df.RPM = df.RPM.astype(float) df.MPG = df.MPG.astype(float) df.GP = df.GP.astype(int) df.to_csv(file_path) return df else: return pd.read_csv(file_path)
def merge_shot_pbp_for_season(season, season_type='Regular Season', override_file=False): file_path = data_dir + 'merged_shot_pbp/' + season + '.csv' if override_file or not file_check(file_path): play_by_play_endpoint = PlayByPlay() shot_endpoint = ShotChartDetail() pbp_df = pd.DataFrame() log = TeamAdvancedGameLogs().get_data({'Season': season, 'SeasonType': season_type}, override_file=True) games = log.GAME_ID.unique() for g in games: if len(str(g)) < 10: g = '00' + str(g) pbp_df = pbp_df.append( play_by_play_endpoint.get_data({'GameID': g, 'Season': season, 'SeasonType': season_type})) pbp_df['GAME_ID'] = '00' + pbp_df['GAME_ID'].astype(str) shots_df = shot_endpoint.get_data({'Season': season, 'SeasonType': season_type}, override_file=True) merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'GAME_ID', 'PERIOD'], right_on=['GAME_EVENT_ID', 'GAME_ID', 'PERIOD']) merge_df.to_csv(file_path) return merge_df else: return pd.read_csv(file_path)
def get_rotation_data_for_game(game_id, year='2017-18', single_game_file_path='./single_game/'): pbp_ep = PlayByPlay() game_id = str(game_id) if len(game_id) < 10: game_id = '00' + str(game_id) pbp_df = pbp_ep.get_data({'Season': year, 'GameID': game_id}) pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD']) teams = pbp_df['PLAYER1_TEAM_ABBREVIATION'].unique()[1:] rotation_df = pd.DataFrame() index = 1 for t in teams: team_df = get_team_df(pbp_df, t) team_lineups = get_game_lineups_for_team(team_df) team_game_player_stints_df = get_game_player_stints_for_team( team_lineups) team_rotation_df = transform_stints_for_viz(team_game_player_stints_df) players = team_game_player_stints_df['player'].unique() players = sorted( players, key=lambda x: -team_game_player_stints_df[ team_game_player_stints_df['player'] == x]['time'].sum()) team_rotation_df['pindex'] = 0 for player in players: cond = team_rotation_df.player == player team_rotation_df.pindex[cond] = index index += 1 index += 1 rotation_df = rotation_df.append(team_rotation_df) file_check(single_game_file_path) rotation_df.to_csv(single_game_file_path + 'data.csv') score_df = get_score_data_for_game(game_id) score_df.to_csv(single_game_file_path + 'score.csv')
def get_viz_data_for_team_season(team_abbreviation): log = TeamAdvancedGameLogs().get_data({ 'Season': season, 'LastNGames': '3' }, override_file=True) log = log[log['TEAM_ABBREVIATION'] == team_abbreviation] pbp_ep = PlayByPlay() season_player_stints_df = pd.DataFrame() games = log.GAME_ID.tolist() for game in games: game = str(game) if len(game) < 10: game = '00' + str(game) pbp_df = pbp_ep.get_data({'Season': season, 'GameID': game}) pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD']) game_stints_df = get_game_player_stints_for_team( pbp_df, team_abbreviation) season_player_stints_df = season_player_stints_df.append( game_stints_df) rotation_data = transform_stints_for_viz(season_player_stints_df) players = season_player_stints_df['player'].unique() players = sorted(players, key=lambda x: -season_player_stints_df[ season_player_stints_df['player'] == x]['time'].sum()) index = 1 rotation_data['pindex'] = 0 for player in players: sys.stdout.write("\"" + player + "\",") cond = rotation_data.player == player rotation_data.pindex[cond] = index index += 1 file_check(season_file_path) rotation_data.to_csv(season_file_path)
def get_data(self, passed_params=default_params, override_file=False): check_params(passed_params) params = self.set_params(passed_params) file_path = self.determine_file_path(params) if (not file_check(file_path)) or override_file: r = requests.post(self.base_url, data=params, headers=request_headers) print( str(r.status_code) + ': ' + construct_full_url(self.base_url, params)) data = r.json()['resultSets'][1] headers = [ 'GROUP_SET_ON', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'VS_PLAYER_ID', 'VS_PLAYER_NAME', 'COURT_STATUS_ON', 'GP_ON', 'MIN_ON', 'PLUS_MINUS_ON', 'OFF_RATING_ON', 'DEF_RATING_ON', 'NET_RATING_ON' ] rows = data['rowSet'] data_dict = [dict(zip(headers, row)) for row in rows] on_df = pd.DataFrame(data_dict) data = r.json()['resultSets'][2] headers = [ 'GROUP_SET_OFF', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'VS_PLAYER_ID', 'VS_PLAYER_NAME', 'COURT_STATUS_OFF', 'GP_OFF', 'MIN_OFF', 'PLUS_MINUS_OFF', 'OFF_RATING_OFF', 'DEF_RATING_OFF', 'NET_RATING_OFF' ] rows = data['rowSet'] data_dict = [dict(zip(headers, row)) for row in rows] off_df = pd.DataFrame(data_dict) df = pd.merge(on_df, off_df, on=[ 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'VS_PLAYER_ID', 'VS_PLAYER_NAME' ]) df.to_csv(file_path) return df else: print(file_path) return pd.read_csv(file_path)
def get_rotation_data_for_game(game_id, year='2017-18', single_game_file_path='./single_game/'): pbp_ep = PlayByPlay() game_id = str(game_id) if len(game_id) < 10: game_id = '00' + str(game_id) pbp_df = pbp_ep.get_data({'Season': year, 'GameID': game_id}) pbp_df['TIME'] = convert_time(pbp_df['PCTIMESTRING'], pbp_df['PERIOD']) teams = pbp_df['PLAYER1_TEAM_ABBREVIATION'].unique()[1:] rotation_df = pd.DataFrame() index = 1 for t in teams: team_df = get_team_df(pbp_df, t) team_lineups = get_game_lineups_for_team(team_df) team_game_player_stints_df = get_game_player_stints_for_team(team_lineups) team_rotation_df = transform_stints_for_viz(team_game_player_stints_df) players = team_game_player_stints_df['player'].unique() players = sorted(players, key=lambda x: -team_game_player_stints_df[team_game_player_stints_df['player'] == x][ 'time'].sum()) team_rotation_df['pindex'] = 0 for player in players: cond = team_rotation_df.player == player team_rotation_df.pindex[cond] = index index += 1 index += 1 rotation_df = rotation_df.append(team_rotation_df) file_check(single_game_file_path) rotation_df.to_csv(single_game_file_path + 'data.csv') score_df = get_score_data_for_game(game_id) score_df.to_csv(single_game_file_path + 'score.csv')
def merge_shot_pbp_for_game(pbp_df, game_id, season, season_type='Regular Season', merge_type='inner', override_file=False): file_path = data_dir + 'merged_shot_pbp/' + season + '/' + game_id + '.csv' if override_file or not file_check(file_path): shot_endpoint = ShotChartDetail() shots_df = shot_endpoint.get_data( {'Season': season, 'SeasonType': season_type, 'GameID': game_id}, override_file=override_file) merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'PERIOD'], right_on=['GAME_EVENT_ID', 'PERIOD'], how=merge_type) merge_df.to_csv(file_path) return merge_df else: return pd.read_csv(file_path)
def get_data(self, passed_params=default_params, override_file=False): check_params(passed_params) params = self.set_params(passed_params) file_path = self.determine_file_path(params) if (not file_check(file_path)) or override_file: full_url = construct_full_url(self.base_url, params) print(full_url) r = requests.get(full_url, headers=self.synergy_request_headers) if r.status_code != 200: raise ConnectionError('{}:{}'.format(r.status_code, r.reason)) df = pd.DataFrame(r.json()['results']) df.to_csv(file_path) return df else: print(file_path) return pd.read_csv(file_path)
def get_data(self, passed_params=default_params, override_file=False): check_params(passed_params) params = self.set_params(passed_params) file_path = self.determine_file_path(params) if (not file_check(file_path)) or override_file: print(construct_full_url(self.base_url, params)) r = requests.post(self.base_url, data=params, headers=request_headers) if r.status_code != 200: raise ConnectionError( str(r.status_code) + ': ' + str(r.reason)) df = json_to_pandas(r.json(), self.index) df.to_csv(file_path) return df else: print(file_path) return pd.read_csv(file_path)
def get_tov_pbp_data(override_file=False): if override_file or not file_check(file_path): pbp_ep = PlayByPlay() log_ep = TeamAdvancedGameLogs() log_df = log_ep.get_data({'Season': season}, override_file=False) season_pbp_df = pd.DataFrame() for g in log_df['GAME_ID'].unique(): g = str(g) if len(g) < 10: g = '00' + g game_df = pbp_ep.get_data({'Season': season, 'GameID': g}) game_df = game_df[game_df['EVENTMSGTYPE'] == 5] season_pbp_df = season_pbp_df.append(game_df) season_pbp_df = season_pbp_df.fillna('') season_pbp_df.to_csv(file_path) return season_pbp_df else: return pd.read_csv(file_path)
def merge_shot_pbp_for_game(season, game_id, season_type='Regular Season', override_file=False): file_path = data_dir + 'merged_shot_pbp/' + season + '/' + game_id + '.csv' if override_file or not file_check(file_path): play_by_play_endpoint = PlayByPlay() shot_endpoint = ShotChartDetail() pbp_df = play_by_play_endpoint.get_data({'GameID': game_id, 'Season': season, 'SeasonType': season_type}) pbp_df['GAME_ID'] = '00' + pbp_df['GAME_ID'].astype(str) shots_df = shot_endpoint.get_data({'Season': season, 'SeasonType': season_type, 'GameID': game_id}, override_file=override_file) merge_df = pd.merge(pbp_df, shots_df, left_on=['EVENTNUM', 'GAME_ID', 'PERIOD'], right_on=['GAME_EVENT_ID', 'GAME_ID', 'PERIOD'], how='left') merge_df.to_csv(file_path) return merge_df else: return pd.read_csv(file_path)
def aggregate_data(self, season='2017-18', season_type='Regular Season', override_file=False): file_path = data_dir + '/boxscorematchups/aggregate_{}.csv'.format( season) if (not file_check(file_path)) or override_file: log = TeamAdvancedGameLogs().get_data( { 'Season': season, 'SeasonType': season_type }, override_file=True) games = [ '00' + str(g) if len(str(g)) < 10 else str(g) for g in log.GAME_ID.unique() ] season_df = pd.concat( [self.get_data({'GameID': g}) for g in games]) sum_col = [ 'AST', 'BLK', 'DEF_FOULS', 'FG3A', 'FG3M', 'FGA', 'FGM', 'FTM', 'HELP_BLK', 'HELP_BLK_REC', 'OFF_FOULS', 'PLAYER_PTS', 'POSS', 'SFL', 'TEAM_PTS', 'TOV' ] group_col = ['OFF_PLAYER_NAME', 'DEF_PLAYER_NAME'] df = season_df.groupby(group_col)[sum_col].sum() df.reset_index(inplace=True) df = df[df['POSS'] >= 10] df.to_csv(file_path) return df else: return pd.read_csv(file_path)
def get_rpm(): file_path = data_dir + 'RPM/' + str(datetime.date.today()) + '.csv' if not file_check(file_path): rpm_url = 'http://www.espn.com/nba/statistics/rpm/_/page/{page}/sort/RPM' headers = [] data = [] for p in range(1, 12): r = requests.post(rpm_url.format(page=p)) html = r.content soup = BeautifulSoup(html) if p == 1: headers = [th.getText() for th in soup.find_all('tr')[0].find_all('td')] rows = soup.find_all('tr')[1:] data.extend([[td.getText() for td in rows[i].find_all('td')] for i in range(len(rows))]) df = pd.DataFrame(data, columns=headers) df.NAME = df.NAME.apply(lambda x: x.split(',')[0]) df.to_csv(file_path) return df else: return pd.read_csv(file_path)
def get_season_schedule(season='2017-18', override_file=False): file_path = data_dir + season + '_schedule.csv' if (not file_check(file_path)) or override_file: base_url = 'https://www.basketball-reference.com/leagues/NBA_2018_games-{}.html' months = [ 'october', 'november', 'december', 'january', 'february', 'march', 'april' ] schedule_data = [] for month in months: r = requests.post(base_url.format(month)) html = r.content soup = BeautifulSoup(html) if month == months[0]: headers = [ th.getText() for th in soup.find_all('tr')[0].find_all('th') ] rows = soup.find_all('tr')[1:] schedule_data.extend( [[td.getText() for td in rows[i].find_all(['th', 'td'])] for i in range(len(rows))]) schedule_df = pd.DataFrame(schedule_data, columns=headers) schedule_df.fillna(0) schedule_df.rename(columns={ 'Home/Neutral': 'Home', 'Visitor/Neutral': 'Visitor' }, inplace=True) schedule_df.to_csv(file_path) return schedule_df else: return pd.read_csv(file_path)