def runResults(): FACTORS_DICT = \ { 'AR1': 1,\ 'WINLOSS': 1,\ 'HOMEAWAY': 1,\ 'LASTMINUTES': 1 } playersCurrent = pd.DataFrame(goldsberry.PlayerList(2015)) playersCurrent.to_csv('playerList.csv') teamList = playersCurrent[['TEAM_CODE', 'TEAM_ID']] teamList = teamList.drop_duplicates(take_last=True) teamList['TEAM_CODE'].replace('', np.nan, inplace=True) teamList.dropna(subset=['TEAM_CODE'], inplace=True) teamList = teamList.reset_index(drop=True) Results = [['test', 0]] # for x in range(0, len(teamList.index)): for x in range(0, len(teamList.index)): Team_id = teamList['TEAM_ID'][x] Team_roster = goldsberry.team.roster(Team_id, season='2015') Team_roster = pd.DataFrame(Team_roster.players()) for y in range(0, len(Team_roster.index)): # for y in range(0, len(Team_roster.index)): Player_id = Team_roster['PLAYER_ID'][y] player = Player(Player_id, '2015') forecastnum = 0 try: if player.player_gamePlayed>=20: forecast = GetOnePlayerForecasts(Player_id) forecastnum = forecast[0] Results.append([Player_id,forecastnum]) except Exception: Results.append([Player_id,forecastnum]) pass Results = pd.DataFrame(Results, columns=['PLAYER_ID', 'ForecastFPTS']) Results.to_csv('result2.csv')
def GetAllPlayersLogData(season='2015'): playersCurrent = pd.DataFrame(goldsberry.PlayerList(season)) # playersCurrent.to_csv('playerList.csv') teamList = playersCurrent[['TEAM_CODE', 'TEAM_ID']] teamList = teamList.drop_duplicates(take_last=True) teamList['TEAM_CODE'].replace('', np.nan, inplace=True) teamList.dropna(subset=['TEAM_CODE'], inplace=True) teamList = teamList.reset_index(drop=True) Results = pd.DataFrame() for x in range(0, len(teamList.index)): Team_id = teamList['TEAM_ID'][x] Team_roster = goldsberry.team.roster(Team_id, season) Team_roster = pd.DataFrame(Team_roster.players()) for y in range(0, len(Team_roster.index)): Player_id = Team_roster['PLAYER_ID'][y] Player = goldsberry.player.game_logs(Player_id, season) Player_log = pd.DataFrame(Player.logs()) if len(Player_log) >= 1: Player_log = AddFantacyPointToPlayerLog(Player_log) Results = Results.append(Player_log) return Results
bbref = pd.read_csv('../data/bbref_advanced.csv') # the format of this table, taken directly from basketball-reference.com, is a little annoying: the 'Player' column features a player's name, followed by his abbreviation that the site uses as a URL endpoint. We don't need this bit, so this line strips it bbref.Player = bbref.Player.apply(lambda x: x.split('\\', 1)[0].translate( None, string.punctuation.replace('-', '').replace("'", '')).replace( ' Jr', '').replace('Jr.', '').replace("III", '').strip()) # There are a few (extremely annoying) exceptions to the naming conventions on NBA.com. bbref is very good about standardizing this, NBA is decidedly not....these next lines fix these exceptions bbref.Player[99] = 'Nene' bbref.Player[268] = 'Taurean Prince' # bbref.Player[118] = 'T.J. McConnell' players2016 = goldsberry.PlayerList(Season='2016-17') players2016 = pd.DataFrame(players2016.players()) players2016.DISPLAY_FIRST_LAST = players2016.DISPLAY_FIRST_LAST.apply( lambda x: str(x).split('\\', 1)[0].translate( None, string.punctuation.replace('-', '').replace("'", '')).replace( ' Jr', '').replace('Jr.', '').replace('III', '').strip()) player_dict = pd.Series(players2016.PERSON_ID.values, index=players2016.DISPLAY_FIRST_LAST) # now we see the reason for reading in the basketball-reference table: that table was built using the basketball-reference season finder, with parameters GP >= 50, and MPG >= 10. This loop will remove all players from our player_dict that don't meet those criteria. players_dict = {} for player in bbref.Player.values: players_dict[player] = player_dict[player]
def Player_team_id(self): playersCurrent = pd.DataFrame(goldsberry.PlayerList(self.year)) Player_team_id = playersCurrent[playersCurrent['PERSON_ID'] == self.id].TEAM_ID return Player_team_id
import numpy as np westTeams = [['Golden State Warriors', 1610612744], ['San Antonio Spurs', 1610612759], ['Oklahoma City Thunder', 1610612760], ['Los Angeles Clippers', 1610612746], ['Memphis Grizzlies', 1610612763], ['Portland Trailblazers', 1610612757], ['Dallas Mavericks', 1610612742], ['Houston Rockets', 1610612745]] eastTeams = [['Cleveland Cavaliers', 1610612739], ['Toronto Raptors', 1610612761], ['Atlanta Hawks', 1610612737], ['Boston Celtics', 1610612738], ['Miami Heat', 1610612748], ['Charlotte Hornets', 1610612766], ['Detroit Pistons', 1610612765], ['Indiana Pacers', 1610612754]] gameids = gb.GameIDs() players = gb.PlayerList() gameids2015 = pd.DataFrame(gameids.game_list()) gameids2015.to_csv('AllTeams.csv') players2015 = pd.DataFrame(players.players()) for i in range(0, 4): team1WestName = westTeams[i][0] team1WestID = westTeams[i][1] team1WestPlayers = players2015[players2015['TEAM_ID'] == team1WestID] team1WestLog = gameids2015[gameids2015['TEAM_ID'] == team1WestID] team1WestPlayerID = team1WestPlayers['PERSON_ID'] team1WestPlayerGameLog = [] for personID in team1WestPlayerID: team1WestPlayerGameLog.append( pd.DataFrame(gb.player.game_logs(personID).logs()))
def get_players(): players_json = goldsberry.PlayerList() players_df = pd.DataFrame(players_json.players()) df_players.rename(columns={'DISPLAY_FIRST_LAST': 'PLAYER_NAME'}, inplace=True) return player_df[['PLAYER_NAME', 'PERSON_ID']]
def pre_post_ASB_2017_18(playercode): first, last = playercode.split('_') first = first.title() last = last.title() players = goldsberry.PlayerList(Season='2017-18') players2017 = pd.DataFrame(players.players()) games = goldsberry.GameIDs() games.get_new_data(Season='2017-18') games2017 = pd.DataFrame(games.game_list()) player_id = players2017.loc[players2017['PLAYERCODE'] == playercode]['PERSON_ID'] team_id = players2017.loc[players2017['PLAYERCODE'] == playercode]['TEAM_ID'] player_game_logs = goldsberry.player.game_logs(player_id) player_game_logs_2017 = pd.DataFrame(player_game_logs.logs()) player_game_logs_2017['GmSc'] = player_game_logs_2017.apply( lambda row: gamescore(row), axis=1) player_game_logs_2017['TSP'] = player_game_logs_2017.apply( lambda row: TSPercent(row), axis=1) post_all_star = player_game_logs_2017.loc[58:82] pre_all_star = player_game_logs_2017.loc[0:57] # lgl17 = post_all_star.loc[ # (post_all_star['PTS'] > 25) # & (post_all_star['PTS'] < 30) # ] print getGameLog(player_game_logs_2017.loc[5], playercode, players2017, games2017) sortedAfter = post_all_star.sort_values(by=['TSP']) sortedBefore = pre_all_star.sort_values(by=['TSP']) afterGS = np.array(pd.DataFrame(sortedAfter, columns=['GmSc'])) afterTSP = np.array(pd.DataFrame(sortedAfter, columns=['TSP'])) beforeGS = np.array(pd.DataFrame(sortedBefore, columns=['GmSc'])) beforeTSP = np.array(pd.DataFrame(sortedBefore, columns=['TSP'])) print afterGS.mean() print "\n" print beforeGS.mean() # MAKE A LINE PLOT # ####################### # USE FOR A BASIC LINE PLOT # scaled_post = (pd.Series(range(1,len(after) + 1)) * len(before)/len(after)).tolist() # print afterTSP # print afterGS # plt.plot(afterTSP,afterGS,beforeTSP,beforeGS) # plt.xlabel('True Shooting %') # plt.ylabel('GameScore') # # MAKE A BOX/WHISKER # ####################### # fig, axs = plt.subplots(1,2) # axs[0].boxplot(before) # axs[0].set_title("Before All-Star Break (PPG)") # axs[1].boxplot(after) # axs[1].set_title("After All-Star Break (PPG)") # axs[1].set_ylim(axs[0].get_ylim()) # fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.9, hspace =0.4, wspace=0.3) # MAKE A HISTOGRAM # ##################### fig, axs = plt.subplots(1, 2, sharey=True, tight_layout=True) axs[0].hist(beforeGS, bins=15) axs[0].set_title('GameScore Before All-Star Break') axs[1].hist(afterGS, bins=15) axs[1].set_title('GameScore After All-Star Break') plt.savefig('figure.png')
# Import Goldsberry, PANDAS import goldsberry import pandas as pd players2010 = goldsberry.PlayerList(Season='2010-11') players2010 = pd.Dataframe(players2010.players()) players2010.head() #
#!/usr/bin/env python # Mohammad Saad # 1/4/2016 # player_id.py # Builds up a dictionary of players and their stats.nba.com IDs # Exports to a file with a Python-loadable dictionary to be used elsewhere import goldsberry import pickle player_list = goldsberry.PlayerList(AllTime=True) player_dict = {} # build up dictionary for every player for i in range(0, len(player_list)): name = player_list[i]["PLAYERCODE"] if name == None: continue elif name[0] == 'H': name = name[8:] player_dict[name] = int(player_list[i]["PERSON_ID"]) pickle.dump(player_dict, open('player_dict.txt', 'wb'))
import goldsberry import pandas as pd import numpy as np from scipy.stats import binned_statistic_2d import seaborn as sns from bokeh.plotting import figure from math import pi pd.set_option("display.max_columns", 50) goldsberry.__version__ players_2015 = goldsberry.PlayerList() players_2015 = pd.DataFrame(players_2015.players()) harden_id = players_2015['PERSON_ID'].ix[players_2015['DISPLAY_LAST_COMMA_FIRST'].str.contains("Harden")] harden_shots = goldsberry.player.shot_chart(harden_id) harden_shots = pd.DataFrame(harden_shots.chart()) harden_shots.head() sns.set_style("white") sns.set_color_codes() plt.figure(figsize=(12,11)) plt.scatter(harden_shots.LOC_X, harden_shots.LOC_Y) plt.show() right = harden_shots[harden_shots.SHOT_ZONE_AREA == "Right Side(R)"] plt.figure(figsize=(12,11)) plt.scatter(right.LOC_X, right.LOC_Y)