def advanced_season_totals_to_csv(season): next_season = season + 1 client.players_advanced_season_totals( season_end_year=season, output_type=OutputType.CSV, output_file_path="./" + season + "_" + next_season + "_advanced_player_season_totals.csv" )
def assert_player_advanced_season_totals_csv(self): players_advanced_season_totals( season_end_year=self.year, output_type=OutputType.CSV, output_file_path=self.output_file_path, include_combined_values=self.include_combined_values, ) with open(self.output_file_path, "r", encoding="utf8") as output_file, \ open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file: self.assertEqual( output_file.readlines(), expected_output_file.readlines(), )
def assert_player_advanced_season_totals_json(self): players_advanced_season_totals( season_end_year=self.year, output_type=OutputType.JSON, output_file_path=self.output_file_path, include_combined_values=self.include_combined_values, ) with open(self.output_file_path, "r", encoding="utf8") as output_file, \ open(self.expected_output_file_path, "r", encoding="utf8") as expected_output_file: self.assertEqual( json.load(output_file), json.load(expected_output_file), )
def test_last_2018_players_advanced_season_totals_row(self): result = players_advanced_season_totals(season_end_year=2018) self.assertEqual( result[604], { "age": 20, "assist_percentage": 8.8, "block_percentage": 3.0, "box_plus_minus": -2.0, "defensive_box_plus_minus": -0.3, "defensive_rebound_percentage": 20.1, "defensive_win_shares": 0.5, "free_throw_attempt_rate": 0.418, "games_played": 43, "is_combined_totals": False, "minutes_played": 410, "name": "Ivica Zubac", "offensive_box_plus_minus": -1.8, "offensive_rebound_percentage": 11.8, "offensive_win_shares": 0.5, "player_efficiency_rating": 15.3, "positions": [Position.CENTER], "slug": "zubaciv01", "steal_percentage": 0.9, "team": Team.LOS_ANGELES_LAKERS, "three_point_attempt_rate": 0.008, "total_rebound_percentage": 16.0, "true_shooting_percentage": 0.557, "turnover_percentage": 15.3, "usage_percentage": 17.6, "value_over_replacement_player": 0.0, "win_shares": 1.0, "win_shares_per_48_minutes": 0.118 })
def player_stats(): print("collecting player season totals") client.players_season_totals( season_end_year=2020, output_type=OutputType.CSV, output_file_path= "C:\\Users\\NWHAL\\Documents\\nba_project\\2020_player_season_totals.csv" ) print("collecting advanced player stats") client.players_advanced_season_totals( season_end_year=2020, output_type=OutputType.CSV, output_file_path= "C:\\Users\\NWHAL\\Documents\\nba_project\\2020_advanced_player_season_totals.csv" )
def get_rookie_advanced_stats(year, rookie_names): season_totals = client.players_advanced_season_totals(season_end_year=year) stats_dict = {} for total in season_totals: if total['name'] in rookie_names and total['games_played'] >= 30: stats_dict[total['name']] = total return stats_dict
def get_roster_player_list(team): client.players_advanced_season_totals( season_end_year=2020, output_type=OutputType.CSV, output_file_path="./csv/advanced_players.csv") client.players_season_totals(season_end_year=2020, output_type=OutputType.CSV, output_file_path="./csv/season_totals.csv") df_advanced_stats = pd.read_csv("./csv/advanced_players.csv") df_total_season_stats = pd.read_csv("./csv/season_totals.csv") players_advanced_roster_stats = df_advanced_stats.loc[ df_advanced_stats['team'].str.contains(team.name[1].upper())] players_total_roster_stats = df_total_season_stats.loc[ df_advanced_stats['team'].str.contains(team.name[1].upper())] return players_advanced_roster_stats, players_total_roster_stats
def plot_usage_vs_points(): df = DataFrame() df2 = DataFrame() for x in (client.players_advanced_season_totals(season_end_year=2020)): df = df.append(DataFrame(x), ignore_index=True) for x in (client.players_season_totals(season_end_year=2020)): df2 = df2.append(DataFrame(x), ignore_index=True) df['avg_points'] = round(df2['points'] / df2['games_played'], 2) df.plot(x='usage_percentage', y='avg_points', kind='scatter') plt.show()
def test_players_advanced_season_totals_json(self): expected_output_file_path = os.path.join( os.path.dirname(__file__), "../output/expected/player_advanced_season_totals_2018.json", ) result = players_advanced_season_totals(season_end_year=2018, output_type=OutputType.JSON) with open(expected_output_file_path, "r") as expected_output: self.assertEqual( json.loads(result), json.load(expected_output), )
def get_season_advanced(): """ Scrapes advanced stats for all seasons since 1950 """ df = pd.DataFrame() for year in range(1950, 2020): players = client.players_advanced_season_totals(season_end_year=year) for player in players: player["year"] = year df = df.append(player, ignore_index=True) return df
def create_player_totals_csv(year_start, year_end, advanced=False): for i in range(year_start, year_end + 1): if advanced: if not os.path.exists("data/adv_total_stats"): os.makedirs("data/adv_total_stats") client.players_advanced_season_totals( season_end_year=i, output_type=OutputType.CSV, output_file_path="adv_total_stats/stats_{}.csv".format(i)) else: if not os.path.exists("data/total_stats"): os.makedirs("data/total_stats") client.players_season_totals( season_end_year=i, output_type=OutputType.CSV, output_file_path="total_stats/stats_{}.csv".format(i)) # self.feed_season_stats_to_db(stats, i) print( "Exported stats for the year {}. {} percent completed".format( i, 100 * ((i - year_start + 1) / (year_end - year_start + 1))))
def get_df_advanced_player_stats(year): """ Aggregates all relevant ADVANCED stats for players in given year :param year: Integer Ex.) 2021 :return: DataFrame """ adv = client.players_advanced_season_totals(season_end_year=year) df_advanced = pd.json_normalize(adv) df_advanced["position"] = df_advanced["positions"].astype(str).str.split(":").str[1] df_advanced["position"] = df_advanced["position"].map(lambda x: x.lstrip(" '").rstrip(">]'")) cols_groupby = ["slug", "name", "age", "position"] cols_adv = ['player_efficiency_rating', 'true_shooting_percentage', 'three_point_attempt_rate', 'free_throw_attempt_rate', 'offensive_rebound_percentage', 'defensive_rebound_percentage', 'total_rebound_percentage', 'assist_percentage', 'steal_percentage', 'block_percentage', 'turnover_percentage', 'usage_percentage', 'offensive_win_shares', 'defensive_win_shares', 'win_shares', 'win_shares_per_48_minutes', 'offensive_box_plus_minus', 'defensive_box_plus_minus', 'box_plus_minus', 'value_over_replacement_player'] cols_adv_contrib = [col + "_contrib" for col in cols_adv] df_total_minutes = df_advanced.groupby(cols_groupby)["minutes_played"].sum().reset_index() df_total_minutes = df_total_minutes.rename(columns = {"minutes_played": "minutues_played_total"}) df_stats = pd.merge(df_advanced, df_total_minutes, on=cols_groupby) df_stats["weight"] = df_stats["minutes_played"] / df_stats["minutues_played_total"] print(df_stats.columns) # Get contribs for col in cols_adv: df_stats[col+"_contrib"] = df_stats[col] * df_stats["weight"] # Aggregate contributions to get final df df_stats_agg = df_stats.groupby(cols_groupby)[cols_adv_contrib].sum().reset_index() # Remove "contrib" column name from all contrib columns cols_df_stats_agg = [col.replace("_contrib", "") for col in df_stats_agg.columns] df_stats_agg.columns = cols_df_stats_agg df_stats.to_pickle(c.PICKLE_PATH_ADV_STATS) return df_stats_agg
def clean_advanced(year): advanced_stats = client.players_advanced_season_totals( season_end_year=year) df = pd.DataFrame(advanced_stats) # Handle quirk in data where traded players are represented as multiple observations df['minutes_played_total'] = df.groupby('name').minutes_played.transform( 'sum') df['proportion'] = df['minutes_played'] / df['minutes_played_total'] num_cols = list(df.select_dtypes(include=['int', 'float64'])) unwanted_num_cols = ['age', 'minutes_played', 'games_played'] for col in unwanted_num_cols: num_cols.remove(col) for col in num_cols: df[col] = df[col] * df['proportion'] df_grouped = df.groupby('name')[num_cols].agg('sum') df_grouped['age'] = df.groupby('name')['age'].agg('mean') df_grouped['year'] = year return df_grouped
def test_first_2018_players_advanced_season_totals_row(self): result = players_advanced_season_totals(season_end_year=2018) self.assertEqual( result[0], { "age": 24, "assist_percentage": 3.4, "block_percentage": 0.6, "box_plus_minus": -1.5, "defensive_box_plus_minus": 0.4, "defensive_rebound_percentage": 8.9, "defensive_win_shares": 1.0, "free_throw_attempt_rate": 0.158, "games_played": 75, "is_combined_totals": False, "minutes_played": 1134, "name": "\u00c1lex Abrines", "offensive_box_plus_minus": -1.9, "offensive_rebound_percentage": 2.5, "offensive_win_shares": 1.3, "player_efficiency_rating": 9.0, "positions": [ Position.SHOOTING_GUARD, ], "slug": "abrinal01", "steal_percentage": 1.7, "team": Team.OKLAHOMA_CITY_THUNDER, "three_point_attempt_rate": 0.759, "total_rebound_percentage": 5.6, "true_shooting_percentage": 0.567, "turnover_percentage": 7.4, "usage_percentage": 12.7, "value_over_replacement_player": 0.1, "win_shares": 2.2, "win_shares_per_48_minutes": 0.094 }, )
def test_players_advanced_season_totals_csv_append(self): players_advanced_season_totals( season_end_year=2018, output_type=OutputType.CSV, output_file_path="./player_advanced_season_totals_2019.csv", output_write_option=OutputWriteOption.APPEND)
def test_players_advanced_season_totals_csv(self): players_advanced_season_totals( season_end_year=2018, output_type=OutputType.CSV, output_file_path="./player_advanced_season_totals_2019.csv")
def test_players_advanced_season_totals_json(self): result = players_advanced_season_totals(season_end_year=2018, output_type=OutputType.JSON) self.assertIsNotNone(result)
def test_players_advanced_season_totals(self): result = players_advanced_season_totals(season_end_year=2018) self.assertIsNotNone(result)
# -*- coding: utf-8 -*- from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType import pandas as pd seasons_df = pd.read_excel(io='years_nba.xlsx') seasons_list = [row['seasons'] for index, row in seasons_df.iterrows()] #slice seasons_list seasons = [nba_season[0:4] for nba_season in seasons_list] for nba_season in seasons: file_path_thistime = "./" + nba_season + "_advanced_stats.csv" client.players_advanced_season_totals(season_end_year=nba_season, output_type=OutputType.CSV, output_file_path=file_path_thistime)
def main(): print("\n#######################################################################") print("#######################################################################") print("######################### NBA Report exporter #########################") print("#######################################################################") print("#######################################################################\n") while (True): print( "1. Players box scores by a date\ \n2. Players season statistics for a season\ \n3. Players advanced season statistics for a season\ \n4. All Team box scores by a date\ \n5. Schedule for a season\ \n6. Exit" ) reportObject = input("\nPlease select a option: ") # Players box scores by a date if (reportObject == "1"): inputDate = input("\nEnter a date (use this format 1-1-2018): ") fileName = "all-player-box-report-" + inputDate + ".csv" dateList = inputDate.split("-") print("Exporting report please wait..........") # Call Export function client.player_box_scores( day=dateList[0], month=dateList[1], year=dateList[2], output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Players season statistics for a season elif (reportObject == "2"): endYear = input("\nEnter season end year: ") fileName = "all-player-season-report-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.players_season_totals( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Players advanced season statistics for a season elif (reportObject == "3"): endYear = input("\nEnter season end year: ") fileName = "all-player-advanced-season-report-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.players_advanced_season_totals( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # All Team box scores by a date elif (reportObject == "4"): inputDate = input("\nEnter a date (use this format 1-1-2018): ") fileName = "all-team-report-" + inputDate + ".csv" dateList = inputDate.split("-") print("Exporting report please wait..........") # Call Export function client.team_box_scores( day=dateList[0], month=dateList[1], year=dateList[2], output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Schedule for a season elif (reportObject == "5"): endYear = input("\nEnter season end year: ") fileName = "season-schedule-" + endYear + ".csv" print("Exporting report please wait..........") # Call Export function client.season_schedule( season_end_year=endYear, output_type=OutputType.CSV, output_file_path="exported_files/" + fileName ) print("Report exported at: " + abspath("exported_files/" + fileName) + "!!\n\n") # Exit elif (reportObject == "6"): print("\n#######################################################################") print("################################# Bye #################################") print("#######################################################################\n") break # Error else: print("Invalid option!!\n\n")
def test_2019_player_advanced_season_totals(self): player_season_totals = client.players_advanced_season_totals(season_end_year=2019) self.assertIsNotNone(player_season_totals) self.assertTrue(len(player_season_totals) > 0)
def get_advanced_season_totals(year): return client.players_advanced_season_totals(season_end_year=year)
from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType, Team print(client.season_schedule(season_end_year=2021)) client.players_advanced_season_totals( season_end_year=2018, output_type=OutputType.CSV, output_file_path="./2017_2018_player_season_totals.csv" ) client.play_by_play( home_team=Team.BOSTON_CELTICS, year=2018, month=10, day=16, output_type=OutputType.CSV, output_file_path="./2018_10_06_BOS_PBP.csv" ) client.regular_season_player_box_scores( player_identifier="westbru01", season_end_year=2018, output_type=OutputType.CSV, output_file_path="./2017_2018_russell_westbrook_regular_season_box_scores.csv" )
def create_base_df(season_year): from basketball_reference_web_scraper import client import pandas as pd import unicodedata advanced_stats = client.players_advanced_season_totals( season_end_year=2020) season_stats = client.players_season_totals(season_end_year=2020) total_df = pd.DataFrame(season_stats) total_df['positions'], total_df['team'] = total_df['positions'].astype( str), total_df['team'].astype(str) positions = { "[<Position.CENTER: 'CENTER'>]": "C", "[<Position.SHOOTING_GUARD: 'SHOOTING GUARD'>]": "SG", "[<Position.POWER_FORWARD: 'POWER FORWARD'>]": "PF", "[<Position.SMALL_FORWARD: 'SMALL FORWARD'>]": "SF", "[<Position.POINT_GUARD: 'POINT GUARD'>]": "PG", "[<Position.GUARD: 'GUARD'>]": "SG" } team_sub = {"Team.": "", "_": ' '} total_df = total_df.replace(positions).replace(team_sub, regex=True) total_df = total_df.assign( field_goal_percentage=(total_df['made_field_goals'] * 100 / total_df['attempted_field_goals']).round(1), three_point_field_goal_percentage=( total_df['made_three_point_field_goals'] * 100 / total_df['attempted_three_point_field_goals']).round(1), free_throw_percentage=(total_df['made_free_throws'] * 100 / total_df['attempted_free_throws']).round(1), rebounds=total_df['offensive_rebounds'] + total_df['defensive_rebounds']).fillna(0) total_df['no_accents'] = total_df['name'].apply( lambda x: unicodedata.normalize('NFD', x).encode( 'ascii', 'ignore').decode('UTF-8').replace(".", "")) total_df.no_accents[total_df.no_accents == 'Taurean Waller-Prince'] = 'Taurean Prince' total_df = total_df.groupby(['name', 'slug', 'no_accents'], as_index=False).agg({ 'field_goal_percentage': 'mean', 'free_throw_percentage': 'mean', 'made_three_point_field_goals': 'sum', 'made_field_goals': 'sum', 'made_free_throws': 'sum', 'games_played': 'sum', 'attempted_field_goals': 'sum', 'attempted_free_throws': 'sum', 'rebounds': 'sum', 'assists': 'sum', 'blocks': 'sum', 'steals': 'sum', 'turnovers': 'sum', 'team': 'last' }).drop_duplicates() salaries = pd.read_csv("nba_beta_salary.csv", sep=",", engine='python') total_df_with_salaries = total_df.join(salaries[['slug', '2019-20' ]].set_index('slug'), on='slug').dropna() total_df_with_salaries = total_df_with_salaries.drop('slug', axis=1) total_df_with_salaries['ppg'] = (2 * ( total_df_with_salaries['made_field_goals'] - total_df_with_salaries['made_three_point_field_goals']) + \ 3 * (total_df_with_salaries['made_three_point_field_goals']) + total_df_with_salaries['made_free_throws']) / \ total_df_with_salaries['games_played'] return total_df_with_salaries
def get_advanced_totals(self, season_year): advanced_totals = client.players_advanced_season_totals(season_year) return advanced_totals
def test_2018_players_advanced_season_totals_length(self): result = players_advanced_season_totals(season_end_year=2018) self.assertEqual(len(result), 605)
def test_2001_players_advanced_season_totals_csv(self): players_advanced_season_totals( season_end_year=2001, output_type=OutputType.CSV, output_file_path="./player_advanced_season_totals_2001.csv", output_write_option=OutputWriteOption.WRITE)
from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType import pandas as pd # Get 2019-2020 advanced season statistics for all players filename = "data/player_advanced_stats_2020.csv" client.players_advanced_season_totals(season_end_year=2020, output_type=OutputType.CSV, output_file_path="data/player_advanced_stats_2020.csv") client.players_season_totals(season_end_year=2020, output_type=OutputType.CSV, output_file_path="data/player_stats_2020.csv") with open('data/player_advanced_stats_2020.csv') as advanced: with open('data/player_stats_2020.csv') as regular: newdata = open('data/player_combined_data_2020.csv', 'w+') i = 0 reglines = regular.readlines() for line in advanced.readlines(): newdata.write(line.replace('\n','') + reglines[i]) i += 1
# season_end_year can only be 2000 - 2020 # Visit https://github.com/jaebradley/basketball_reference_web_scraper for more info import json from basketball_reference_web_scraper import client from basketball_reference_web_scraper.data import OutputType season_end_year = 2020 total_season_stats = client.players_season_totals(season_end_year) advanced_stats = client.players_advanced_season_totals(season_end_year) for a, b in zip(total_season_stats, advanced_stats): a["advanced_stats"] = b a["positions"] = a["positions"][0].value a["team"] = a["team"].value del a["advanced_stats"]['positions'] del a["advanced_stats"]['team'] season_stats = json.dumps(total_season_stats) with open('./{year}_season_totals.json'.format(year=season_end_year), 'w') as outfile: json.dump(total_season_stats, outfile)