Ejemplo n.º 1
0
def rozegrane_mecze_dzienne(data, sezon):

    # Pobieranie listy meczy dla podanej daty
    mecze_dzienne = leaguegamelog.LeagueGameLog(
        season=sezon,
        league_id='00',
        season_type_all_star='Regular Season',
        date_from_nullable=data,
        date_to_nullable=data,
        headers=headers,
        timeout=60)
    mecze_dzienne_slownik = mecze_dzienne.get_normalized_dict()
    mecze_dzienne_lista = mecze_dzienne_slownik['LeagueGameLog']

    wyniki_meczy_lista = []
    # Zwraca pary meczowe dla podanego dnia w formacie Gospodarz - Gość, oraz wynik danego meczu
    # @ oznacza drużyne gości
    mecze_dla_gospodarzy_slownik = {}
    for i in range(0, len(mecze_dzienne_lista), 2):
        if '@' in mecze_dzienne_lista[i]['MATCHUP']:
            nazwa_goscia = mecze_dzienne_lista[i]['TEAM_NAME']
            nazwa_gospodarza = mecze_dzienne_lista[i + 1]['TEAM_NAME']

            wyniki_meczy_lista.append(mecze_dzienne_lista[i + 1]['WL'])

        else:
            nazwa_goscia = mecze_dzienne_lista[i + 1]['TEAM_NAME']
            nazwa_gospodarza = mecze_dzienne_lista[i]['TEAM_NAME']

            wyniki_meczy_lista.append(mecze_dzienne_lista[i]['WL'])

        mecze_dla_gospodarzy_slownik.update({nazwa_gospodarza: nazwa_goscia})

    wyniki_meczy = [mecze_dla_gospodarzy_slownik, wyniki_meczy_lista]
    return (wyniki_meczy)
Ejemplo n.º 2
0
def get_league_gamelogs(season_start_year):
    team_ids_map = get_team_abbrev_to_id_map()
    start_date, end_date = get_season_start_and_end_dates(season_start_year)
    gamelogs = reformat_response(leaguegamelog.LeagueGameLog(
        season=start_year_to_season(season_start_year),
        date_from_nullable=start_date,
        date_to_nullable=end_date).league_game_log.get_dict())
    # remove duplicates and map to GameOverview objects
    seen_game_ids = set()
    new_gamelogs = []
    for game in gamelogs:
        if game['GAME_ID'] not in seen_game_ids:
            seen_game_ids.add(game['GAME_ID'])
            home_team, away_team = get_teams_by_matchup_str(game['MATCHUP'])
            home_team_won = (home_team == game['TEAM_ABBREVIATION']) == (game['WL'] == 'W')
            new_gamelogs.append(
                GameOverview(
                    game['GAME_ID'],
                    game['GAME_DATE'],
                    team_ids_map[home_team],
                    team_ids_map[away_team],
                    home_team_won
                )
            )
    return new_gamelogs
Ejemplo n.º 3
0
def getGameData(dateFrom):

    if dateFrom != '':
        gameLog = leaguegamelog.LeagueGameLog(date_from_nullable=dateFrom)
    else:
        gameLog = leaguegamelog.LeagueGameLog()
    glDf = gameLog.get_data_frames()[0]
        #loop through game log
    insertStatements = ""
    updateStatements = ""
    for i, gameRow in glDf.iterrows():
        sqlRow = getSQLDataInDF("SELECT * from NBA_game where gameID = '" + gameRow['GAME_ID'] + "'")
            # if gameID already exists, insert current team id to whichever of homeTeamID and awayTeamID is null 
        if not sqlRow.empty:
            if sqlRow.iloc[0]['homeTeamID'] is None:
                if sqlRow.iloc[0]['awayTeamID'] != str(gameRow["TEAM_ID"]):
                    runAndCommitSQL("update NBA_game set homeTeamID = '%s' where gameID = '%s'; "
                    % (gameRow["TEAM_ID"], gameRow['GAME_ID']))
            elif sqlRow.iloc[0]['awayTeamID'] is None:
                if sqlRow.iloc[0]['homeTeamID'] != str(gameRow["TEAM_ID"]):
                    runAndCommitSQL("update NBA_game set awayTeamID = '%s' where gameID = '%s'; "
                    % (gameRow["TEAM_ID"], gameRow['GAME_ID']))
            else:
                print('neither team for gameID is null')

            
        #if it does not exist, insert to table with one team id null    
        else:
            matchup = gameRow['MATCHUP']

            if matchup[4] == '@':
                teamIDinDB = 'awayTeamID'
            elif matchup[4] == 'v':
                teamIDinDB = 'homeTeamID'
            else:
                raise ValueError('Error: can\'t determine home or away from matchup')

            runAndCommitSQL("insert into NBA_game (gameID, "+ teamIDinDB + ", gameDate, seasonID) values ('%s', '%s', '%s', '%s');" 
            % (gameRow['GAME_ID'],gameRow["TEAM_ID"],gameRow["GAME_DATE"],gameRow["SEASON_ID"] ))
Ejemplo n.º 4
0
def writeSeasonToFile(season):
    full_filepath = os.path.join(os.path.dirname(__file__), 'data',
                                 'season-{}.csv'.format(season))
    if not os.path.exists(full_filepath):
        league_gamelog = leaguegamelog.LeagueGameLog(season_all_time=season)
        games = league_gamelog.get_normalized_dict()['LeagueGameLog']
        csv_file = open(full_filepath, 'w', newline='')
        print(games)
        csv_writer = csv.DictWriter(csv_file, games[0].keys())
        csv_writer.writeheader()
        csv_writer.writerows(games)
        #w = csv.DictWriter(f,my_dict.keys())
        #w.writerows(my_dict)

    return True
Ejemplo n.º 5
0
def getGamesData(season):
    from nba_api.stats.endpoints import leaguegamelog
    gamelog = leaguegamelog.LeagueGameLog(season=season)
    #get dictionary of game log for season interested in
    dictgamelog = gamelog.league_game_log.get_dict()
    counter = 0

    seasonData = dictgamelog['data']
    # gets all the LAL games
    for game in seasonData:
        if game[2] == 'LAL':
            counter += 1
            # adds the fetures mentioned above
            tempList = [game[4][2:], game[5], game[6], game[7]]
            seasonDataList.append(tempList)
Ejemplo n.º 6
0
def dailyMatchups(date, season, past=True):

    # Obtains a list of teams who played on specified date
    dailyMatchups = leaguegamelog.LeagueGameLog(
        season=season,
        league_id='00',
        season_type_all_star='Regular Season',
        date_from_nullable=date,
        date_to_nullable=date,
        headers=customHeaders,
        timeout=60)
    dailyMatchupsDict = dailyMatchups.get_normalized_dict()
    listOfTeams = dailyMatchupsDict['LeagueGameLog']

    winLossList = []
    homeAwayDict = {}
    for i in range(0, len(listOfTeams), 2):  # Loops through every other team
        if '@' in listOfTeams[i][
                'MATCHUP']:  # @ in matchup indicates that the current team is away
            awayTeam = listOfTeams[i]['TEAM_NAME']
            homeTeam = listOfTeams[i + 1]['TEAM_NAME']

            winLossList.append(listOfTeams[
                i + 1]['WL'])  # Appends if the home team won or lost to list

        else:
            awayTeam = listOfTeams[i + 1]['TEAM_NAME']
            homeTeam = listOfTeams[i]['TEAM_NAME']

            winLossList.append(
                listOfTeams[i]
                ['WL'])  # Appends if the home team won or lost to the list

        homeAwayDict.update(
            {homeTeam:
             awayTeam})  # Adds current game to list of all games for that day

    if past == True:
        matchupsResultCombined = [
            homeAwayDict, winLossList
        ]  # Combines games and win/loss results into one list
        return (matchupsResultCombined)

    else:
        return (homeAwayDict)
Ejemplo n.º 7
0
def create_cat_list(season, cat):

    response_gamelog = lgl.LeagueGameLog(season=season)

    time.sleep(0.5)

    content_gamelog = json.loads(response_gamelog.get_json())

    # transform contents into dataframe
    results_gamelog = content_gamelog['resultSets'][0]
    headers_gamelog = results_gamelog['headers']
    rows_gamelog = results_gamelog['rowSet']
    league_gamelog = pd.DataFrame(rows_gamelog)
    league_gamelog.columns = headers_gamelog

    cat_list = league_gamelog[cat].tolist()

    return league_gamelog, cat_list
Ejemplo n.º 8
0
def get_tsa_requirement():
    # Get current NBA season
    full_season = get_current_season_full()

    # Fetch league game log for current season
    stats = leaguegamelog.LeagueGameLog(season=full_season)
    current_gamelog = stats.get_normalized_dict()["LeagueGameLog"]

    # Calculate true shooting attempts (TSA) qualifier for current season
    # TSA = FGA + 0.44 * FTA
    total_tsa_requirement = (TSA_REQUIREMENT_NORMAL_SEASON / 82 *
                             CURRENT_SEASON_NUM_GAMES)

    # Calculate percentage of total scheduled NBA games played so far
    num_games_played = len(current_gamelog)
    num_games_total = CURRENT_SEASON_NUM_GAMES * CURRENT_SEASON_NUM_TEAMS
    pct_games_played = num_games_played / num_games_total

    return pct_games_played * total_tsa_requirement
Ejemplo n.º 9
0
import pandas as pd
import json
from nba_api.stats.endpoints import leaguegamelog
import time
import numpy as np

season = '2020-21'

response = leaguegamelog.LeagueGameLog(
			season = season,
			player_or_team_abbreviation = 'T')

time.sleep(0.1)

content = json.loads(response.get_json())

# transform contents into dataframe
teamgamelog_raw = content['resultSets'][0]

headers = teamgamelog_raw['headers']

rows = teamgamelog_raw['rowSet']

teamgamelog_df = pd.DataFrame(rows)

teamgamelog_df.columns = headers

# teamgamelog_ft = teamgamelog_df.drop(teamgamelog_df.columns.difference(['SEASON_ID']), 1)
# each game is two rows
game_rows = 2
Ejemplo n.º 10
0
def create_movingAvgPlayer_df(start_year, end_year, stat, plot_flag):

    players_cond_all_list = []

    year_range = year_range_func.create_yearRange_list(start_year, end_year)

    for year in year_range:

        print(year)

        log_raw = leaguegamelog.LeagueGameLog(
            player_or_team_abbreviation='P',
            season=year,
            sorter='DATE',
        )

        time.sleep(1)
        content = json.loads(log_raw.get_json())
        results = content['resultSets'][0]
        headers = results['headers']
        rows = results['rowSet']

        log = pd.DataFrame(rows)
        log.columns = headers

        # set bounds for games played and minutes played in a season
        game_min = 50
        mins_min = game_min * 10

        # sum all minutes played and count games played for each player
        log_mins_gp = log.groupby(['PLAYER_NAME']).agg({
            'MIN': sum,
            'PLAYER_NAME': 'count'
        })

        # filter out by mins and gp
        # second filter must be from previously created df keep single variable qualifiers out
        log_cond = log_mins_gp[log_mins_gp['MIN'] >= mins_min]
        log_cond = log_cond[log_cond['PLAYER_NAME'] >= game_min]
        log_cond.columns = ['MINS', 'GP']

        pd.set_option("display.max_rows", None, "display.max_columns", None)

        # create list of players that satisfy conditions
        players_cond = log_cond.index.tolist()

        # create df with players that satisfy condition
        # use df[~df] for 'is not in'
        log_stat = log[log['PLAYER_NAME'].isin(players_cond)]

        # drop all unneeded columns and reset index
        log_stat = log_stat.drop(
            log_stat.columns.difference(['PLAYER_NAME', stat]), 1)
        log_stat.reset_index(drop=True, inplace=True)

        # calculate average of specified stat
        average_stat = log_stat.groupby('PLAYER_NAME')[stat].mean()

        # create moving average column
        log_stat['Average'] = log_stat.groupby('PLAYER_NAME')[stat].cumsum(
        ) / (log_stat.groupby('PLAYER_NAME')[stat].cumcount() + 1)

        # initialize list
        final_avg_list = []

        # loop through players
        for player in log_stat['PLAYER_NAME']:

            # append list of player final averages
            final_avg_list.append(average_stat.loc[player])

        # create new df column with final averages
        log_stat['Final Average'] = final_avg_list

        # create column if moving avg is within final avg bounds
        log_stat['Within bounds?'] = np.where(
            (log_stat['Average'] >= np.floor(log_stat['Final Average']))
            & (log_stat['Average'] < np.ceil(log_stat['Final Average'])), 1, 0)

        # create column of moving gp
        log_stat['Cum Count'] = log_stat.groupby(
            'PLAYER_NAME')[stat].cumcount() + 1

        # copy player names to new df and remove duplicates
        players_cond = log_stat[['PLAYER_NAME']].copy()
        players_cond.drop_duplicates(inplace=True)

        # initialize list (going to be list of lists)
        zero_list = []

        # loop through players
        for player in players_cond['PLAYER_NAME']:

            # append list to create list of zeros index for each player
            zero_list.append(
                np.where((log_stat['Within bounds?'] == 0)
                         & (log_stat['PLAYER_NAME'] == player)))

        # initialize list
        final_zero_list = []

        # loop through list of lists
        for zeros_list in range(0, len(zero_list)):

            # retrieve index of final zero for each player
            final_zero_list.append(zero_list[zeros_list][0][-1])

        # create new column with final zero index
        players_cond['Final 0 Index'] = final_zero_list

        # initialize lists
        cumcount_list = []
        final_avg_cond_list = []

        # loop through players final zero indices
        for idx in players_cond['Final 0 Index']:

            # append list of final zero game
            cumcount_list.append(log_stat['Cum Count'][idx])

            # append list of final average
            final_avg_cond_list.append(log_stat['Final Average'][idx])

        # correct game count
        cumcount_list = [game + 1 for game in cumcount_list]

        # create new columns
        players_cond['No Change Game'] = cumcount_list
        players_cond['Final Average'] = final_avg_cond_list

        # initialize list
        gp_list = []

        # loop through players
        for player in players_cond['PLAYER_NAME']:

            # append list of gp
            gp_list.append(log_cond.loc[player, 'GP'])

        # add new column for gp and reset index
        players_cond['GP'] = gp_list
        players_cond.reset_index(drop=True, inplace=True)

        # create ratio of season column
        players_cond[
            'rat_season'] = players_cond['No Change Game'] / players_cond['GP']

        players_cond_all_list.append(players_cond)

        # xy plot, x is no change game, y is ppg
        #plt.scatter(players_cond['rat_season'], players_cond['Final Average'])
        #plt.show()

    players_cond_all = pd.concat(players_cond_all_list)
    players_cond_all.reset_index(drop=False, inplace=True)

    p25 = np.percentile(players_cond_all['Final Average'], 25)
    p50 = np.percentile(players_cond_all['Final Average'], 50)
    p75 = np.percentile(players_cond_all['Final Average'], 75)

    players_p25 = players_cond_all[players_cond_all['Final Average'] < p25]
    players_p50 = players_cond_all[(players_cond_all['Final Average'] >= p25)
                                   & (players_cond_all['Final Average'] < p50)]
    players_p75 = players_cond_all[(players_cond_all['Final Average'] >= p50)
                                   & (players_cond_all['Final Average'] < p75)]
    players_p100 = players_cond_all[players_cond_all['Final Average'] >= p75]

    p25_avg = players_p25['rat_season'].mean()
    p50_avg = players_p50['rat_season'].mean()
    p75_avg = players_p75['rat_season'].mean()
    p100_avg = players_p100['rat_season'].mean()

    p25_stdev = stdev(players_p25['rat_season'])
    p50_stdev = stdev(players_p50['rat_season'])
    p75_stdev = stdev(players_p75['rat_season'])
    p100_stdev = stdev(players_p100['rat_season'])

    print('Average ratio:\n', p25_avg, '\n', p50_avg, '\n', p75_avg, '\n',
          p100_avg, '\n')
    print('Standard deviation:\n', p25_stdev, '\n', p50_stdev, '\n', p75_stdev,
          '\n', p100_stdev, '\n')

    if plot_flag == 1:

        plt.figure(1)

        plt.subplot(2, 2, 1)
        hist_p25 = players_p25['rat_season'].hist()
        plt.title('Percentile 25')

        plt.subplot(2, 2, 2)
        hist_p50 = players_p50['rat_season'].hist()
        plt.title('Percentile 50')

        plt.subplot(2, 2, 3)
        hist_p75 = players_p75['rat_season'].hist()
        plt.title('Percentile 75')

        plt.subplot(2, 2, 4)
        hist_p100 = players_p100['rat_season'].hist()
        plt.title('Percentile 100')

        plt.figure(2)
        hist_all = players_cond['rat_season'].hist()

        plt.show()

    return players_cond
Ejemplo n.º 11
0
    def __init__(self, season=datetime.datetime.today().year - 1):
        '''
        Season object using data from basketball-reference.com

        Parameters:

        season (int, default: current year - 1)
            The season that you want to pull data from. 
                Ex. 2008
            If the season you inputted isn't an integer, a TypeError will be thrown.

        Attributes:
            self.season
            self.season_str
            self.games - df of all games in a season
            self.league - df of teams in the league
        '''

        try:
            season = int(season)
        except:
            # - This is probably because they inputted a string for season, and we need an int
            raise TypeError(
                "Wrong variable type for season. Integer expected.")
        self.season = season
        self.season_str = str(season) + "-" + str(season + 1)[2:]

        # - basketball-reference references the season by the second year in each season, so we need to add 1 to the season
        season = self.season + 1
        # - The season goes from October to June usually, so we will go from July to June to capture all data
        # todo change when NBA season is changed to dec-aug
        # - see how months are formatted on bball ref during october and other months for 2020 season
        # - see how other seasons are formatted as well
        months = [
            datetime.date(2019, i, 1).strftime('%B').lower()
            for i in list(range(10, 13)) + list(range(1, 10))
        ]

        # - Getting the list of URLs with our months list
        urls = []
        for i in months:
            urls.append('https://www.basketball-reference.com/leagues/NBA_' +
                        str(season) + '_games-' + str(i) + '.html')

        games = pd.DataFrame()
        for url in urls:
            try:
                month = pd.read_html(url)[0]
                month.drop(['Notes', 'Unnamed: 6'], axis=1, inplace=True)
                month.dropna(subset=['PTS'], inplace=True)
                games = pd.concat([games, month], sort=False)
            except:
                pass

        # - Reset the index and rename the overtime column
        games.reset_index(inplace=True, drop=True)
        games.rename(columns={'Unnamed: 7': 'OT'}, inplace=True)

        self.games = games
        self.league = pd.DataFrame(teams.get_teams())

        try:
            self.playoff_start = self.games[self.games['Date'] ==
                                            'playoffs'].index[0]
        except:
            # - The specified season doeesn't contain playoff games
            self.playoff_start = None

        log = leaguegamelog.LeagueGameLog(
            counter=0,
            direction='ASC',
            league_id='00',
            player_or_team_abbreviation='T',
            season=self.season_str,
            season_type_all_star='Regular Season')

        self.game_log = log.get_data_frames()[0]

        log = leaguegamelog.LeagueGameLog(counter=0,
                                          direction='ASC',
                                          league_id='00',
                                          player_or_team_abbreviation='T',
                                          season=self.season_str,
                                          season_type_all_star='Playoffs')

        self.playoffs = log.get_data_frames()[0]

        log = leaguegamelog.LeagueGameLog(
            counter=0,
            direction='ASC',
            league_id='00',
            player_or_team_abbreviation='P',
            season=self.season_str,
            season_type_all_star='Regular Season')

        self.reg_player_scoring = log.get_data_frames()[0]

        log = leaguegamelog.LeagueGameLog(counter=0,
                                          direction='ASC',
                                          league_id='00',
                                          player_or_team_abbreviation='P',
                                          season=self.season_str,
                                          season_type_all_star='Playoffs')

        self.po_player_scoring = log.get_data_frames()[0]
Ejemplo n.º 12
0
from nba_api.stats.endpoints import leaguegamelog
import pickle
import time

for i in range(46, 99):
    year = i
    season = "19{}-{}".format(year, year + 1)

    logs = leaguegamelog.LeagueGameLog(season=season)  #season="ALLTIME"
    datadict = logs.get_dict()
    with open("regular.pickle", "wb") as f:
        pickle.dump(datadict["resultSets"], f, pickle.HIGHEST_PROTOCOL)

    logs = leaguegamelog.LeagueGameLog(season=season,
                                       season_type_all_star="Playoffs")
    datadict = logs.get_dict()
    with open("playoff.pickle", "wb") as f:
        pickle.dump(datadict["resultSets"], f, pickle.HIGHEST_PROTOCOL)

    with open("regular.pickle", "rb") as f:
        data = pickle.load(f)

    tmp = data[0]["rowSet"]
    info = "============ Start of Season ============\n"

    for i in tmp:
        if i[7] == "W":
            info += i[5]
            info += " | "
            info += "w: {} | ".format(i[6].split()[0])
            info += "L: {}\n".format(i[6].split()[-1])
Ejemplo n.º 13
0
def games_request(season_string, bb_date, season_type):
    return leaguegamelog.LeagueGameLog(
        season=season_string,
        date_from_nullable=bb_date,
        date_to_nullable=bb_date,
        season_type_all_star=season_type).get_dict()['resultSets'][0]['rowSet']
Ejemplo n.º 14
0
teams = teams.get_teams()

#print(type(teams))
#print(teams[1])

# get list of team IDs

team_ids = list()  # list of all team IDs
team_abbrev = list()  # list of all team abbreviations
for i in teams:  # append lists team IDs and names
    team_ids.append(i['id'])
    team_abbrev.append(i['abbreviation'])

#print(team_ids)

season = '2018-19'  # current season

grab_game_data = leaguegamelog.LeagueGameLog(
    season_all_time=season)  # grab games from current season
game_data = grab_game_data.get_data_frames()[0]

#print(list(game_data)) # check column titles
#print(game_data[['TEAM_ABBREVIATION', 'GAME_DATE', 'PTS']].head(n=10))

#print(game_data.head(n=5))

# output dataframe to CSV for visualizatoin later

file_name = '2018_19_games_data.csv'
game_data.to_csv(file_name)
Ejemplo n.º 15
0
import numpy as np
import pandas as pd

from nba_api.stats.endpoints import boxscoreplayertrackv2
from nba_api.stats.endpoints import leaguegamelog
import time

start = time.time()

games = leaguegamelog.LeagueGameLog(season='2019-20').get_data_frames()[0]
games = games['GAME_ID'].unique()

df = pd.DataFrame()
for index in games:
    raw = boxscoreplayertrackv2.BoxScorePlayerTrackV2(index).get_data_frames()[0]
    df = df.append(raw)

df.to_csv('cc.csv')
print("--- %s seconds ---" % (time.time() - start))
Ejemplo n.º 16
0
    dictgamelog = gamelog.league_game_log.get_dict()
        
    #print(dictgamelog['headers'])
    count = 0
    acount =0
    justdata = dictgamelog['data']
    for game in justdata:
        gameList.append(game)

"""

from nba_api.stats.endpoints import leaguegamelog
seasons = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
gameList = []
for season in seasons:
    gamelog = leaguegamelog.LeagueGameLog(season=season)
    #get dictionary of game log for season interested in
    dictgamelog = gamelog.league_game_log.get_dict()

    #print(dictgamelog['headers'])
    count = 0
    acount = 0
    print(dictgamelog['headers'])
    justdata = dictgamelog['data']
    for game in justdata:
        if game[6][4:7] == 'vs.':
            gameList.append(game)
"""

with open('games_details.csv','r') as details_file:
    ca =0
Ejemplo n.º 17
0
def create_playergamelog_cum_df(player, stat, year='2020-21'):

    log_raw = leaguegamelog.LeagueGameLog(player_or_team_abbreviation='P',
                                          season=year,
                                          sorter='DATE')

    time.sleep(0.5)

    content = json.loads(log_raw.get_json())
    results = content['resultSets'][0]
    headers = results['headers']
    rows = results['rowSet']

    log = pd.DataFrame(rows)
    log.columns = headers

    log_player = log[log['PLAYER_NAME'] == player].copy()

    log_player.reset_index(inplace=True, drop=True)

    sma_list = [[]] * len(stat)
    cma_list = [[]] * len(stat)

    for i in range(0, len(stat)):

        sma_list[i].append(log_player[stat[i]].rolling(10,
                                                       min_periods=1).mean())
        cma_list[i].append(log_player[stat[i]].expanding(min_periods=1).mean())

    fig_1, ax_1 = plt.subplots()

    ax_1.plot(log_player[stat[0]], '.', label=stat[0])
    ax_1.plot(log_player[stat[1]], '.', label=stat[1])

    ax_1.plot(sma_list[0][0], label=stat[0])
    ax_1.plot(sma_list[0][1], label=stat[1])

    # ax_1.plot(log_player[stat], '.', label = 'REB')
    # ax_1.plot(log_player['SMA'], label = '10 GAME ROLLING AVERAGE')
    # ax_1.plot(log_player['CMA'], label = 'CUMULATIVE AVERAGE')
    # # ax_1.plot(log_player['EWM'], label = 'EXPONENTIAL WEIGHTED AVERAGE')

    ax_1.set_facecolor('black')
    ax_1.set_title(player + ', ' + year)
    # ax_1.set_xlabel('Game')
    # ax_1.set_ylabel(stat + '/G')
    ax_1.legend()

    # # specify file type
    # image_type = '.jpeg'

    # # specify file name
    # fig_name = player + '_' + year + image_type

    # # save file
    # fig_1.savefig(fig_name, dpi = 1000, bbox_inches = 'tight')

    plt.show()

    # log = log.sort_values(by = ['PLAYER_NAME', 'GAME_DATE'],
    # 	ascending = [True, True])

    # log[cum_title] = log.groupby('PLAYER_NAME')[stat].cumsum()
    # log['GP'] = log.groupby('PLAYER_NAME').cumcount() + 1
    # log['Running Average'] = log[cum_title] / log['GP']

    # log_spurs.set_index('GP', inplace = True)
    # log_spurs.groupby('PLAYER_NAME')['Running Average'].plot(legend = True)

    return log_player
Ejemplo n.º 18
0
def create_raw_season_games_df():
    print("Create raw season games dataset. ")
    seasons = data.load_seasons().SEASON.unique()
    print("seasons: ", len(seasons))
    print(seasons)
    raw_season_games = pd.DataFrame()
    for season in seasons:
        season_games = leaguegamelog.LeagueGameLog(season_type_all_star="Regular Season"
                                                   , season=season).get_data_frames()[0] \
            .set_index('GAME_ID').sort_values(by=['GAME_DATE'])

        season_games.dropna(inplace=True)
        season_games.drop(columns=['VIDEO_AVAILABLE'], axis=1, inplace=True)

        season_games["PTS_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'PTS'), axis=1)
        season_games["FGM_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FGM'), axis=1)
        season_games["FGA_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FGA'), axis=1)
        season_games["FG3M_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FG3M'), axis=1)
        season_games["FG3A_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FG3A'), axis=1)
        season_games["FTM_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FTA'), axis=1)
        season_games["FTA_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'FTM'), axis=1)
        season_games["REB_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'REB'), axis=1)
        season_games["AST_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'AST'), axis=1)
        season_games["STL_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'STL'), axis=1)
        season_games["BLK_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'BLK'), axis=1)
        season_games["TOV_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'TOV'), axis=1)
        season_games["PLUS_MINUS_AGAINST"] = season_games.apply(lambda row: against(row, season_games, 'PLUS_MINUS'),
                                                                axis=1)

        season_games["W_L"] = np.where(season_games['WL'] == 'W', 1, -1)

        season_games["G_PLAYED"] = season_games.groupby(by=["TEAM_ID"]).cumcount()

        season_games_sum = season_games.groupby(by=["TEAM_ID"])[[
            'W_L', 'PTS', 'PTS_AGAINST'
            , 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
            'FGM_AGAINST', 'FGA_AGAINST', 'FG3M_AGAINST',
            'FG3A_AGAINST', 'FTM_AGAINST', 'FTA_AGAINST'
        ]] \
            .expanding().sum().groupby(level=0).shift(1).reset_index(level=0)

        season_games = pd.merge(season_games, season_games_sum, suffixes=['', '_CUM'],
                                on=['GAME_ID', 'TEAM_ID'])

        season_games_l10_sum = season_games.groupby(by=["TEAM_ID"])[[
            'W_L', 'PTS', 'PTS_AGAINST',
            'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
            'FGM_AGAINST', 'FGA_AGAINST', 'FG3M_AGAINST',
            'FG3A_AGAINST', 'FTM_AGAINST', 'FTA_AGAINST'
        ]] \
            .rolling(window=10, min_periods=0).sum().groupby(level=0).shift(1).reset_index(level=0)

        season_games = pd.merge(season_games, season_games_l10_sum, suffixes=['', '_CUM_L10'],
                                on=['GAME_ID', 'TEAM_ID'])

        raw_season_games = pd.concat([raw_season_games, season_games])

    raw_season_games["SEASON"] = raw_season_games.SEASON_ID.str[-4:].astype(int)
    raw_season_games['LOCATION'] = np.where(raw_season_games.MATCHUP.str.contains('vs.'), 'HOME', 'AWAY')

    raw_season_games["UNIQUE_MATCHUP"] = raw_season_games.apply(lambda row: matchup_field_by_id(row, raw_season_games),
                                                                axis=1)

    matchup_season_games_mean = raw_season_games.groupby(by=["TEAM_ID", "UNIQUE_MATCHUP"])[
        ['FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
         'REB', 'AST', 'STL', 'BLK', 'TOV', 'PTS', 'PLUS_MINUS'
            , 'REB_AGAINST', 'AST_AGAINST', 'STL_AGAINST',
         'BLK_AGAINST', 'TOV_AGAINST', 'PTS_AGAINST',
         ]] \
        .rolling(window=10, min_periods=0).mean().groupby(level=0).shift(1).reset_index(level=0).reset_index(level=0)
    raw_season_games = pd.merge(raw_season_games, matchup_season_games_mean, suffixes=['', '_MEAN_ML10'],
                                on=['GAME_ID', 'TEAM_ID', 'UNIQUE_MATCHUP'])

    matchup_season_games_w_l_cum = raw_season_games.groupby(by=["TEAM_ID", "UNIQUE_MATCHUP"])[
        ['W_L', 'PTS', 'PTS_AGAINST',
         'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA',
         'FGM_AGAINST', 'FGA_AGAINST', 'FG3M_AGAINST',
         'FG3A_AGAINST', 'FTM_AGAINST', 'FTA_AGAINST'
         ]] \
        .rolling(window=10, min_periods=0).sum().groupby(level=0).shift(1).reset_index(level=0).reset_index(level=0)
    raw_season_games = pd.merge(raw_season_games, matchup_season_games_w_l_cum, suffixes=['', '_CUM_ML10'],
                                on=['GAME_ID', 'TEAM_ID', 'UNIQUE_MATCHUP'])

    raw_season_games = raw_season_games.T.drop_duplicates().T
    raw_season_games.reset_index(inplace=True)
    raw_season_games.to_feather(config.RAW_SEASON_GAMES_DS)
    raw_season_games.to_csv(config.RAW_SEASON_GAMES_DS_CSV)
    print("Process done")
Ejemplo n.º 19
0
    with open(path, 'w') as outFile:
        outFile.write(",".join(headers) + "\n")
        for row in rows:
            rowStr = [str(r) for r in row]
            outFile.write(",".join(rowStr) + "\n")

# Simple Team info such as location and name
teamDataPath = './data/teams.csv'
if not os.path.exists(teamDataPath):
    listOfDicToFile(teams.get_teams(), teamDataPath)
teamsInfo = csvToArrayDic(teamDataPath)

for team in teamsInfo:
    rosterPath = './data/' + team['abbreviation'] + '_roster.csv'
    print(rosterPath)
    if not os.path.exists(rosterPath):
        time.sleep(1)
        resp = commonteamroster.CommonTeamRoster(team['id'], proxy=False)
        rosterDict = resp.common_team_roster.get_dict()
        headers = rosterDict['headers']
        data = rosterDict['data']
        headersAndRowsToCSV(headers, data, rosterPath)
    rosterInfo = csvToArrayDic(rosterPath)

gameLogPath = './data/gameLogs.csv'
gameLog = leaguegamelog.LeagueGameLog(proxy=False)
gameLogDict = gameLog.league_game_log.get_dict()
headers = gameLogDict['headers']
data = gameLogDict['data']
headersAndRowsToCSV(headers, data, gameLogPath)
Ejemplo n.º 20
0
def create_gamelogCum_df(year, column_cum):

    log_raw = leaguegamelog.LeagueGameLog(
        player_or_team_abbreviation='T',
        season=year,
        sorter='DATE',
    )

    time.sleep(1)
    content = json.loads(log_raw.get_json())
    results = content['resultSets'][0]
    headers = results['headers']
    rows = results['rowSet']

    log = pd.DataFrame(rows)
    log.columns = headers

    log['WIN'] = (log['WL'] == 'W') * 1
    log['LOSS'] = (log['WL'] == 'L') * 1

    log['Cumulative Wins'] = log.groupby('TEAM_ABBREVIATION').WIN.cumsum()
    log['Cumulative Losses'] = log.groupby('TEAM_ABBREVIATION').LOSS.cumsum()
    log['Record'] = log['Cumulative Wins'].astype(
        str) + '-' + log['Cumulative Losses'].astype(str)

    log['Cumulative Point Differential'] = log.groupby(
        'TEAM_ABBREVIATION').PLUS_MINUS.cumsum()

    gamelog_cum_team = log[[
        'SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
        'GAME_DATE', 'WL', 'Cumulative Wins', 'Record',
        'Cumulative Point Differential'
    ]].copy()

    teams = gamelog_cum_team['TEAM_ABBREVIATION'].unique().tolist()
    dates = gamelog_cum_team['GAME_DATE'].unique()

    teams_date = [team + ' DATE' for team in teams]
    teams_gamelog_cum = [team for team in teams]

    column_names = list_func.alternateList(teams_date, teams_gamelog_cum)

    gamelog_cum_date = pd.DataFrame(columns=teams_gamelog_cum)

    if column_cum == 'Wins':

        for team in teams:

            gamelog_cum_list = gamelog_cum_team.loc[
                gamelog_cum_team['TEAM_ABBREVIATION'] ==
                team]['Cumulative Wins'].tolist()

            gamelog_cum_date[team] = gamelog_cum_list

    elif column_cum == 'Record':

        for team in teams:

            gamelog_cum_list = gamelog_cum_team.loc[
                gamelog_cum_team['TEAM_ABBREVIATION'] ==
                team]['Record'].tolist()

            gamelog_cum_date[team] = gamelog_cum_list

    elif column_cum == 'Points Differential':

        for team in teams:

            gamelog_cum_list = gamelog_cum_team.loc[
                gamelog_cum_team['TEAM_ABBREVIATION'] ==
                team]['Cumulative Point Differential'].tolist()

            gamelog_cum_date[team] = gamelog_cum_list

    else:

        print(
            'Improper column input. Input either "Wins", "Record", or "Points Differential" (case-sensitive).'
        )

    return gamelog_cum_date