def build_csv(overwrite): if not os.path.exists('../data/p2p_assists/'): os.makedirs('../data/p2p_assists') print('MAKING DIRECTORY : ' + './data/p2p_assists') file_path = '../data/p2p_assists/passing.csv' if (not os.path.isfile(file_path)) or overwrite: passing_df = pd.DataFrame( columns=['Year', 'Receiver_Id', 'Receiver_Name', 'Passer_Id', 'Passer_Name', 'Frequency', 'Pass', 'Assists']) for year in range(2013, 2016): base_df = get_general_stats(PlayerOrTeam.P, MeasureTypes.BASE, PerModes.TOTAL, get_year_string(year), SeasonTypes.REG) for index, receiver in base_df.iterrows(): print(receiver.PLAYER_NAME) player_passing_df = get_player_passing_dashboard(receiver.PLAYER_ID, get_year_string(year)) if player_passing_df is not None: for index2, passer in player_passing_df.iterrows(): row = [year, passer.PLAYER_ID, passer.PLAYER_NAME_LAST_FIRST, passer.PASS_TEAMMATE_PLAYER_ID, passer.PASS_FROM, passer.FREQUENCY, passer.PASS, passer.AST] passing_df = passing_df.append(pd.Series(row, index=passing_df.columns), ignore_index=True) passing_df = passing_df.sort_values(by='Assists', ascending=False) passing_df.to_csv(file_path) return passing_df else: return pd.read_csv(file_path)
def get_data(overwrite_file): file_path = '../data/offensive_v_defensive_pace/data.csv' if not os.path.exists('../data/offensive_v_defensive_pace'): print("MAKING DIRECTORY!") os.makedirs('../data/offensive_v_defensive_pace') else: print("DIRECTORY EXISTS!") if os.path.isfile(file_path) and not overwrite_file: print("ALREADY HAVE OFFENSIVE AND DEFENSIVE PACE DATA!") return pd.read_csv(file_path) else: print("NEED TO GET OFFNESIVE AND DEFENSIVE PACE DATA!") data_df = pd.DataFrame( columns=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE']) for year in range(2013, 2016): season_year = str(year) + "-" + str(year + 1)[2:4] print('YEAR => ' + season_year) year_df = pd.DataFrame( columns=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE']) # get advanced team data for each year for ORTG, DRTG, and PACE print('GETTING ADVANCED TEAM DATA') year_adv_df = get_general_stats("Team", "Advanced", "Totals", season_year, "Regular+Season") # get sportsVU possession data for each year for Offensive Time of Possession print('GETTING SPORTSVU POSSESSION DATA') year_svu_df = get_sports_vu_stats("Team", "Possessions", "Totals", season_year, "Regular+Season", '0') # defensive possession time, not given explicitly but you can pass opponent id as a parameter and then sum # the possession time of teams playing against each team to calculate defensive possession time for index, team in year_adv_df.iterrows(): print('GETTING POSSESSION TIMES FOR ' + team.TEAM_NAME) opp_id = team.TEAM_ID offensive_top = year_svu_df.loc[year_svu_df['TEAM_ID'] == opp_id]['TIME_OF_POSS'].values[0] print('OFFENSIVE TIME OF POSSESSION => ' + str(offensive_top)) games = year_svu_df.loc[year_svu_df['TEAM_ID'] == opp_id]['GP'].values[0] team_year_svu_df = get_sports_vu_stats("Team", "Possessions", "Totals", season_year, "Regular+Season", opp_id) defensive_top = team_year_svu_df['TIME_OF_POSS'].sum() print('DEFENSIVE TIME OF POSSESSION =>' + str(defensive_top)) year_df = year_df.append(pd.Series( [games, season_year, opp_id, team.TEAM_NAME, offensive_top, defensive_top, team.OFF_RATING, team.DEF_RATING, team.PACE], index=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE']), ignore_index=True) data_df = data_df.append(year_df) data_df['TOTAL_POSSESSIONS'] = data_df['PACE'] * data_df['GP'] data_df['OFF_PACE'] = data_df['OFF_TOP'] / data_df['TOTAL_POSSESSIONS'] data_df['DEF_PACE'] = data_df['DEF_TOP'] / data_df['TOTAL_POSSESSIONS'] data_df['DISPLAY'] = data_df['TEAM_NAME'] + " " + data_df["YEAR"] data_df.to_csv(file_path) return data_df
def chart_assists(data_df, year, num_of_players): players_df = data.get_general_stats( data.PlayerOrTeam.P, data.MeasureTypes.BASE, data.PerModes.TOTAL, year, data.SeasonTypes.REG ) players_df = players_df.sort_values(by=["AST"], ascending=False).head(num_of_players) for i, player in players_df.iterrows(): player_df = data_df[data_df["PLAYER2_ID"] == player.PLAYER_ID] charts.make_matplot_scatter_shot_chart( player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist" ) charts.make_matplot_hexbin_shot_chart( player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist" ) charts.make_matplot_kde_shot_chart( player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist" ) charts.make_histogram(player_df, player.PLAYER_NAME, year, data.SeasonTypes.REG, "assist")
def get_data(overwrite): dir_path = "../data/player_data/synergy_similarity" if not os.path.exists(dir_path): os.makedirs(dir_path) file_path = dir_path + "/synergy_freqs.csv" if (not os.path.isfile(file_path)) or overwrite: data_df = get_general_stats("player", "Base", "Totals", "2015-16", "Regular+Season") data_df = data_df[["PLAYER_ID", "PLAYER_NAME"]] for index, stype in enumerate(SynergyPlayTypes): stype_df = get_synergy_stats("player", stype, "offensive") stype_df = stype_df[["PlayerIDSID", "Time"]] stype_df.columns = ["PLAYER_ID", stype + "_Frequency"] data_df = data_df.merge(stype_df, on="PLAYER_ID", how="left") data_df = data_df.fillna(0) data_df.to_csv(file_path) return data_df else: return pd.read_csv(file_path)
def get_yearly_data(overwrite_file): file_path = '../data/scoring_consistency/top_scorers.csv' if not os.path.exists('../data/scoring_consistency'): print("MAKING DIRECTORY!") os.makedirs('../data/scoring_consistency') else: print("DIRECTORY EXISTS!") if os.path.isfile(file_path) and not overwrite_file: print("ALREADY HAVE TOP SCORERS FILE!") return pd.read_csv(file_path) else: print("NEED TO GET TOP SCORERS FILE") data_df = pd.DataFrame() for year in range(1996, 2016): year_string = str(year) + "-" + str(year + 1)[2:4] print("GETTING STATS FOR: " + year_string) year_df = get_general_stats("player", "Base", "PerGame", year_string, "Regular+Season") year_df = year_df[['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GP', 'PTS']] year_df["YEAR"] = year_string year_df["YEAR_DISPLAY"] = " '" + str(year+1)[2:4] year_df = year_df.loc[year_df["GP"] >= 50] year_df = year_df.loc[year_df["PTS"] >= 25] last_names = [x[1] for x in year_df["PLAYER_NAME"].str.split().tolist()] year_df["DISPLAY"] = last_names + year_df["YEAR_DISPLAY"] if year == 1996: data_df = year_df else: data_df = data_df.append(year_df) data_df.to_csv(file_path) return data_df
import MySQLdb import data_getters as data import pandas as pd db = MySQLdb.connect(host="localhost", user="******", passwd="1qw2SQL3er4", db="nbadb") cur = db.cursor() df = pd.DataFrame() for year in range(1996, 2016): base_df = data.get_general_stats( data.PlayerOrTeam.P, data.MeasureTypes.BASE, data.PerModes.TOTAL, year, data.SeasonTypes.REG ) base_df["YEAR"] = data.get_year_string(year) base_df = base_df[ [ "PLAYER_ID", "TEAM_ID", "YEAR", "AGE", "PLAYER_NAME", "TEAM_ABBREVIATION", "GP", "MIN", "AST", "BLK", "DREB", "OREB", "FGA", "FGM", "FG3A",