Example #1
0
def build_csv(overwrite):
    if not os.path.exists('../data/p2p_assists/'):
        os.makedirs('../data/p2p_assists')
        print('MAKING DIRECTORY : ' + './data/p2p_assists')
    file_path = '../data/p2p_assists/passing.csv'
    if (not os.path.isfile(file_path)) or overwrite:
        passing_df = pd.DataFrame(
            columns=['Year', 'Receiver_Id', 'Receiver_Name', 'Passer_Id', 'Passer_Name', 'Frequency', 'Pass',
                     'Assists'])
        for year in range(2013, 2016):
            base_df = get_general_stats(PlayerOrTeam.P, MeasureTypes.BASE, PerModes.TOTAL, get_year_string(year),
                                        SeasonTypes.REG)
            for index, receiver in base_df.iterrows():
                print(receiver.PLAYER_NAME)
                player_passing_df = get_player_passing_dashboard(receiver.PLAYER_ID, get_year_string(year))
                if player_passing_df is not None:
                    for index2, passer in player_passing_df.iterrows():
                        row = [year, passer.PLAYER_ID, passer.PLAYER_NAME_LAST_FIRST, passer.PASS_TEAMMATE_PLAYER_ID,
                               passer.PASS_FROM, passer.FREQUENCY, passer.PASS, passer.AST]
                        passing_df = passing_df.append(pd.Series(row, index=passing_df.columns), ignore_index=True)
        passing_df = passing_df.sort_values(by='Assists', ascending=False)
        passing_df.to_csv(file_path)
        return passing_df
    else:
        return pd.read_csv(file_path)
def get_data(overwrite_file):
    file_path = '../data/offensive_v_defensive_pace/data.csv'
    if not os.path.exists('../data/offensive_v_defensive_pace'):
        print("MAKING DIRECTORY!")
        os.makedirs('../data/offensive_v_defensive_pace')
    else:
        print("DIRECTORY EXISTS!")
    if os.path.isfile(file_path) and not overwrite_file:
        print("ALREADY HAVE OFFENSIVE AND DEFENSIVE PACE DATA!")
        return pd.read_csv(file_path)
    else:
        print("NEED TO GET OFFNESIVE AND DEFENSIVE PACE DATA!")
        data_df = pd.DataFrame(
            columns=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE'])
        for year in range(2013, 2016):
            season_year = str(year) + "-" + str(year + 1)[2:4]
            print('YEAR => ' + season_year)
            year_df = pd.DataFrame(
                columns=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE'])
            # get advanced team data for each year for ORTG, DRTG, and PACE
            print('GETTING ADVANCED TEAM DATA')
            year_adv_df = get_general_stats("Team", "Advanced", "Totals", season_year, "Regular+Season")
            # get sportsVU possession data for each year for Offensive Time of Possession
            print('GETTING SPORTSVU POSSESSION DATA')
            year_svu_df = get_sports_vu_stats("Team", "Possessions", "Totals", season_year, "Regular+Season", '0')
            # defensive possession time, not given explicitly but you can pass opponent id as a parameter and then sum
            # the possession time of teams playing against each team to calculate defensive possession time
            for index, team in year_adv_df.iterrows():
                print('GETTING POSSESSION TIMES FOR ' + team.TEAM_NAME)
                opp_id = team.TEAM_ID
                offensive_top = year_svu_df.loc[year_svu_df['TEAM_ID'] == opp_id]['TIME_OF_POSS'].values[0]
                print('OFFENSIVE TIME OF POSSESSION => ' + str(offensive_top))
                games = year_svu_df.loc[year_svu_df['TEAM_ID'] == opp_id]['GP'].values[0]
                team_year_svu_df = get_sports_vu_stats("Team", "Possessions", "Totals", season_year, "Regular+Season",
                                                       opp_id)
                defensive_top = team_year_svu_df['TIME_OF_POSS'].sum()
                print('DEFENSIVE TIME OF POSSESSION =>' + str(defensive_top))
                year_df = year_df.append(pd.Series(
                    [games, season_year, opp_id, team.TEAM_NAME, offensive_top, defensive_top, team.OFF_RATING,
                     team.DEF_RATING, team.PACE],
                    index=['GP', 'YEAR', 'TEAM_ID', 'TEAM_NAME', 'OFF_TOP', 'DEF_TOP', 'ORTG', 'DRTG', 'PACE']),
                    ignore_index=True)
            data_df = data_df.append(year_df)
        data_df['TOTAL_POSSESSIONS'] = data_df['PACE'] * data_df['GP']
        data_df['OFF_PACE'] = data_df['OFF_TOP'] / data_df['TOTAL_POSSESSIONS']
        data_df['DEF_PACE'] = data_df['DEF_TOP'] / data_df['TOTAL_POSSESSIONS']
        data_df['DISPLAY'] = data_df['TEAM_NAME'] + " " + data_df["YEAR"]
        data_df.to_csv(file_path)
        return data_df
def chart_assists(data_df, year, num_of_players):
    players_df = data.get_general_stats(
        data.PlayerOrTeam.P, data.MeasureTypes.BASE, data.PerModes.TOTAL, year, data.SeasonTypes.REG
    )
    players_df = players_df.sort_values(by=["AST"], ascending=False).head(num_of_players)
    for i, player in players_df.iterrows():
        player_df = data_df[data_df["PLAYER2_ID"] == player.PLAYER_ID]
        charts.make_matplot_scatter_shot_chart(
            player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist"
        )
        charts.make_matplot_hexbin_shot_chart(
            player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist"
        )
        charts.make_matplot_kde_shot_chart(
            player_df, player.PLAYER_NAME, player.PLAYER_ID, year, data.SeasonTypes.REG, "assist"
        )
        charts.make_histogram(player_df, player.PLAYER_NAME, year, data.SeasonTypes.REG, "assist")
def get_data(overwrite):
    dir_path = "../data/player_data/synergy_similarity"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    file_path = dir_path + "/synergy_freqs.csv"
    if (not os.path.isfile(file_path)) or overwrite:
        data_df = get_general_stats("player", "Base", "Totals", "2015-16", "Regular+Season")
        data_df = data_df[["PLAYER_ID", "PLAYER_NAME"]]
        for index, stype in enumerate(SynergyPlayTypes):
            stype_df = get_synergy_stats("player", stype, "offensive")
            stype_df = stype_df[["PlayerIDSID", "Time"]]
            stype_df.columns = ["PLAYER_ID", stype + "_Frequency"]
            data_df = data_df.merge(stype_df, on="PLAYER_ID", how="left")
        data_df = data_df.fillna(0)
        data_df.to_csv(file_path)
        return data_df
    else:
        return pd.read_csv(file_path)
def get_yearly_data(overwrite_file):

    file_path = '../data/scoring_consistency/top_scorers.csv'

    if not os.path.exists('../data/scoring_consistency'):
        print("MAKING DIRECTORY!")
        os.makedirs('../data/scoring_consistency')
    else:
        print("DIRECTORY EXISTS!")

    if os.path.isfile(file_path) and not overwrite_file:
        print("ALREADY HAVE TOP SCORERS FILE!")
        return pd.read_csv(file_path)
    else:
        print("NEED TO GET TOP SCORERS FILE")
        data_df = pd.DataFrame()
        for year in range(1996, 2016):
            year_string = str(year) + "-" + str(year + 1)[2:4]
            print("GETTING STATS FOR: " + year_string)

            year_df = get_general_stats("player", "Base", "PerGame", year_string, "Regular+Season")

            year_df = year_df[['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'GP', 'PTS']]
            year_df["YEAR"] = year_string
            year_df["YEAR_DISPLAY"] = " '" + str(year+1)[2:4]
            year_df = year_df.loc[year_df["GP"] >= 50]
            year_df = year_df.loc[year_df["PTS"] >= 25]
            last_names = [x[1] for x in year_df["PLAYER_NAME"].str.split().tolist()]
            year_df["DISPLAY"] = last_names + year_df["YEAR_DISPLAY"]

            if year == 1996:
                data_df = year_df
            else:
                data_df = data_df.append(year_df)

        data_df.to_csv(file_path)
        return data_df
Example #6
0
import MySQLdb
import data_getters as data
import pandas as pd

db = MySQLdb.connect(host="localhost", user="******", passwd="1qw2SQL3er4", db="nbadb")
cur = db.cursor()

df = pd.DataFrame()

for year in range(1996, 2016):
    base_df = data.get_general_stats(
        data.PlayerOrTeam.P, data.MeasureTypes.BASE, data.PerModes.TOTAL, year, data.SeasonTypes.REG
    )
    base_df["YEAR"] = data.get_year_string(year)
    base_df = base_df[
        [
            "PLAYER_ID",
            "TEAM_ID",
            "YEAR",
            "AGE",
            "PLAYER_NAME",
            "TEAM_ABBREVIATION",
            "GP",
            "MIN",
            "AST",
            "BLK",
            "DREB",
            "OREB",
            "FGA",
            "FGM",
            "FG3A",