def test_invalid_url_yields_empty_class(self): flexmock(Boxscore) \ .should_receive('_retrieve_html_page') \ .and_return(None) boxscore = Boxscore(BOXSCORE) for key, value in boxscore.__dict__.items(): if key == '_uri': continue assert value is None
def test_game_summary_with_no_scores_returns_none(self): result = Boxscore(None)._parse_summary( pq("""<table id="line_score"> <tbody> <tr> <td class="center"></td> <td class="center"></td> </tr> <tr> <td class="center"></td> <td class="center"></td> </tr> </tbody> </table>""")) assert result == {'away': [None], 'home': [None]}
def predict(boxscores, year): for boxscore in boxscores: box1 = Boxscore(boxscore) home_games = box1.home_wins + box1.home_losses away_games = box1.away_wins + box1.away_losses if box1.home_points > box1.away_points: away_abbrev = box1.losing_abbr home_abbrev = box1.winning_abbr else: away_abbrev = box1.winning_abbr home_abbrev = box1.losing_abbr estimate = round(total_game_points_scored_over_previous_10(away_abbrev, away_games, year) + total_game_points_scored_over_previous_10(home_abbrev, home_games, year), 2) final_estimate = str(estimate / 2) print(away_abbrev + " @ " + home_abbrev) print("Predicted Score: " + final_estimate) print("Actual Score: " + str((box1.home_points + box1.away_points))) print()
def box_scraper(seasons, schedule_df): tcd = tri_code_dict.create_team_conversion() for season in seasons: season_df = schedule_df.loc[schedule_df.Season == season] season_df['date'] = pd.to_datetime(season_df['date']) today = pd.to_datetime(datetime.today()) #print(season_df.head()) box_df = None for index, row in tqdm(season_df.iterrows()): if (row['date'].year > today.year): print("passing over") continue elif (row['date'].year == today.year): if (row['date'].month > today.month): print("passing over") continue elif (row['date'].month == today.month): if (row['date'].day >= today.day): print('passing over') continue #print(row) box_link = row['BoxscoreIndex'] #try: _df = Boxscore(box_link).dataframe if (tcd[row['TeamName']] == row['BoxscoreIndex'][-3:]): _df['home_rolling'] = row['rollingGames'] else: _df['away_rolling'] = row['rollingGames'] if box_df is not None: print(season) box_df = pd.concat([box_df, _df], axis=0, sort=False) #_df['rollingGames']= row['rollingGames'] else: box_df = _df #_df['rollingGames']= row['rollingGames'] #except: continue box_df.to_csv('output/{}_boxscores.csv'.format(season), index=None)
def save_player_data(): gameIds = [id.rstrip('\n') for id in open('game_ids.txt')] for id in gameIds: boxscore = Boxscore(id) away_roster = boxscore.away_players away_data = away_roster.pop().dataframe for player in away_roster: away_data = pd.concat([away_data, player.dataframe]) home_roster = boxscore.home_players home_data = home_roster.pop().dataframe for player in home_roster: home_data = pd.concat([home_data, player.dataframe]) home_data = home_data[PLAYER_STATS] away_data = away_data[PLAYER_STATS] save_game_to_file(id, away_data, home_data)
def save_game_data(): games = [] gameIds = [id.rstrip('\n') for id in open('game_ids.txt')] for id in gameIds: print(counter) boxscore = Boxscore(id) # get the games data and select the needed stats from it stats = boxscore.dataframe stats = stats[input_data.BOXSCORE_STATS] stats = {k: v[0] for k, v in stats.to_dict('list').items()} gameData = { 'stats': stats, 'result': [boxscore.away_points, boxscore.home_points] } games.append(gameData) with open("game_data.json", 'w') as outfile: outfile.write(json.dumps(games))
def update_box(schedule_df): tcd = tri_code_dict.create_team_conversion() box_df = None for index, row in tqdm(schedule_df.iterrows()): print(row) box_link = row['BoxscoreIndex'] try: _df = Boxscore(box_link).dataframe if(tcd[row['TeamName']]==row['BoxscoreIndex'][-3:]): _df['home_rolling'] = row['rollingGames'] else: _df['away_rolling'] = row['rollingGames'] if box_df is not None: print (season) box_df = pd.concat([box_df,_df],axis=0) else: box_df = _df except: continue return box_df
def boxscore(self): """ Returns an instance of the Boxscore class containing more detailed stats on the game. """ return Boxscore(self._boxscore)
def setup_method(self, *args, **kwargs): self.results = { 'date': '10:30 PM, October 31, 2017', 'location': 'STAPLES Center, Los Angeles, California', 'winner': HOME, 'winning_name': 'Los Angeles Lakers', 'winning_abbr': 'LAL', 'losing_name': 'Detroit Pistons', 'losing_abbr': 'DET', 'pace': 97.4, 'away_wins': 5, 'away_losses': 3, 'away_minutes_played': 240, 'away_field_goals': 41, 'away_field_goal_attempts': 94, 'away_field_goal_percentage': .436, 'away_two_point_field_goals': 31, 'away_two_point_field_goal_attempts': 61, 'away_two_point_field_goal_percentage': .508, 'away_three_point_field_goals': 10, 'away_three_point_field_goal_attempts': 33, 'away_three_point_field_goal_percentage': .303, 'away_free_throws': 1, 'away_free_throw_attempts': 3, 'away_free_throw_percentage': .333, 'away_offensive_rebounds': 10, 'away_defensive_rebounds': 34, 'away_total_rebounds': 44, 'away_assists': 21, 'away_steals': 7, 'away_blocks': 3, 'away_turnovers': 12, 'away_personal_fouls': 11, 'away_points': 93, 'away_true_shooting_percentage': .488, 'away_effective_field_goal_percentage': .489, 'away_three_point_attempt_rate': .351, 'away_free_throw_attempt_rate': .032, 'away_offensive_rebound_percentage': 19.2, 'away_defensive_rebound_percentage': 75.6, 'away_total_rebound_percentage': 45.4, 'away_assist_percentage': 51.2, 'away_steal_percentage': 7.2, 'away_block_percentage': 4.6, 'away_turnover_percentage': 11.2, 'away_offensive_rating': 95.5, 'away_defensive_rating': 116.0, 'home_wins': 3, 'home_losses': 4, 'home_minutes_played': 240, 'home_field_goals': 45, 'home_field_goal_attempts': 91, 'home_field_goal_percentage': .495, 'home_two_point_field_goals': 33, 'home_two_point_field_goal_attempts': 65, 'home_two_point_field_goal_percentage': .508, 'home_three_point_field_goals': 12, 'home_three_point_field_goal_attempts': 26, 'home_three_point_field_goal_percentage': .462, 'home_free_throws': 11, 'home_free_throw_attempts': 14, 'home_free_throw_percentage': .786, 'home_offensive_rebounds': 11, 'home_defensive_rebounds': 42, 'home_total_rebounds': 53, 'home_assists': 30, 'home_steals': 9, 'home_blocks': 5, 'home_turnovers': 14, 'home_personal_fouls': 14, 'home_points': 113, 'home_true_shooting_percentage': .582, 'home_effective_field_goal_percentage': .560, 'home_three_point_attempt_rate': .286, 'home_free_throw_attempt_rate': .154, 'home_offensive_rebound_percentage': 24.4, 'home_defensive_rebound_percentage': 80.8, 'home_total_rebound_percentage': 54.6, 'home_assist_percentage': 66.7, 'home_steal_percentage': 9.2, 'home_block_percentage': 8.2, 'home_turnover_percentage': 12.6, 'home_offensive_rating': 116.0, 'home_defensive_rating': 95.5 } flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.boxscore = Boxscore(BOXSCORE)
from sportsreference.nba.boxscore import Boxscore import datetime import pandas as pd if __name__ == "__main__": years = ['2019'] #, '2019'] for year in years: file_path = year + '_games.txt' opened_file = open(file_path, 'r') line = opened_file.readline() season_dfs = [] while line: boxscore = Boxscore(line.rstrip()) season_dfs.append(boxscore.dataframe) line = opened_file.readline() pd.concat(season_dfs).to_pickle(year + '.pkl')
class TestNBABoxscore: @patch('requests.get', side_effect=mock_pyquery) def setup_method(self, *args, **kwargs): flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) self.boxscore = Boxscore(None) def test_away_team_wins(self): fake_away_points = PropertyMock(return_value=75) fake_home_points = PropertyMock(return_value=70) type(self.boxscore)._away_points = fake_away_points type(self.boxscore)._home_points = fake_home_points assert self.boxscore.winner == AWAY def test_home_team_wins(self): fake_away_points = PropertyMock(return_value=70) fake_home_points = PropertyMock(return_value=75) type(self.boxscore)._away_points = fake_away_points type(self.boxscore)._home_points = fake_home_points assert self.boxscore.winner == HOME def test_winning_name_is_home(self): expected_name = 'Home Name' fake_winner = PropertyMock(return_value=HOME) fake_home_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_name = fake_home_name assert self.boxscore.winning_name == expected_name def test_winning_name_is_away(self): expected_name = 'Away Name' fake_winner = PropertyMock(return_value=AWAY) fake_away_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_name = fake_away_name assert self.boxscore.winning_name == expected_name def test_winning_abbr_is_home(self): expected_name = 'HOME' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=HOME) fake_home_abbr = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_abbr = fake_home_abbr assert self.boxscore.winning_abbr == expected_name def test_winning_abbr_is_away(self): expected_name = 'AWAY' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=AWAY) fake_away_abbr = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_abbr = fake_away_abbr assert self.boxscore.winning_abbr == expected_name def test_losing_name_is_home(self): expected_name = 'Home Name' fake_winner = PropertyMock(return_value=AWAY) fake_home_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_name = fake_home_name assert self.boxscore.losing_name == expected_name def test_losing_name_is_away(self): expected_name = 'Away Name' fake_winner = PropertyMock(return_value=HOME) fake_away_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_name = fake_away_name assert self.boxscore.losing_name == expected_name def test_losing_abbr_is_home(self): expected_name = 'HOME' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=AWAY) fake_home_abbr = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_abbr = fake_home_abbr assert self.boxscore.losing_abbr == expected_name def test_losing_abbr_is_away(self): expected_name = 'AWAY' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=HOME) fake_away_abbr = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_abbr = fake_away_abbr assert self.boxscore.losing_abbr == expected_name def test_invalid_away_record_returns_default_wins(self): fake_record = PropertyMock(return_value='Golden State Warriors 1') type(self.boxscore)._away_record = fake_record assert self.boxscore.away_wins == 0 def test_invalid_away_record_returns_default_losses(self): fake_record = PropertyMock(return_value='Golden State Warriors 1') type(self.boxscore)._away_record = fake_record assert self.boxscore.away_losses == 0 def test_invalid_home_record_returns_default_wins(self): fake_record = PropertyMock(return_value='Golden State Warriors 1') type(self.boxscore)._home_record = fake_record assert self.boxscore.home_wins == 0 def test_invalid_home_record_returns_default_losses(self): fake_record = PropertyMock(return_value='Golden State Warriors 1') type(self.boxscore)._home_record = fake_record assert self.boxscore.home_losses == 0 def test_game_summary_with_no_scores_returns_none(self): result = Boxscore(None)._parse_summary(pq( """<table id="line_score"> <tbody> <tr> <td class="center"></td> <td class="center"></td> </tr> <tr> <td class="center"></td> <td class="center"></td> </tr> </tbody> </table>""" )) assert result == { 'away': [None], 'home': [None] } def test_invalid_url_returns_none(self): result = Boxscore(None)._retrieve_html_page('') assert result is None def test_no_class_information_returns_dataframe_of_none(self): mock_points = PropertyMock(return_value=None) type(self.boxscore)._away_points = mock_points type(self.boxscore)._home_points = mock_points assert self.boxscore.dataframe is None def test_nba_game_info(self): fields = { 'date': '7:30 PM, November 9, 2018', 'location': 'State Farm Arena, Atlanta, Georgia' } mock_field = """7:30 PM, November 9, 2018 State Farm Arena, Atlanta, Georgia Logos via Sports Logos.net / About logos """ m = MockBoxscoreData(MockField(mock_field)) for field, value in fields.items(): result = self.boxscore._parse_game_date_and_location(field, m) assert value == result def test_nba_partial_game_info(self): fields = { 'date': '7:30 PM, November 9, 2018', 'location': None } mock_field = """7:30 PM, November 9, 2018 Logos via Sports Logos.net / About logos""" m = MockBoxscoreData(MockField(mock_field)) for field, value in fields.items(): result = self.boxscore._parse_game_date_and_location(field, m) assert value == result
def setup_method(self, *args, **kwargs): flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) self.boxscore = Boxscore(None)
def test_invalid_url_returns_none(self): result = Boxscore(None)._retrieve_html_page('') assert result is None
from sportsreference.nba.boxscore import Boxscore from sportsreference.nba.schedule import Schedule from sportsreference.nba.teams import Teams import pandas as pd teams = Teams() indexes = [] for team in teams: games = team.schedule for game in games: indexes.append(game.boxscore_index) scores = pd.DataFrame() for index in indexes: score = Boxscore(index) df = score.dataframe scores = scores.append(df)
from datetime import datetime import sportsreference from sportsreference.nba.boxscore import Boxscore games_today = Boxscore(datetime.today()) print(games_today.games) # Prints a dictionary of all matchups for today # Pulls all games between and including January 1, 2018 and January 5, 2018 games = Boxscore(datetime(2018, 1, 1), datetime(2018, 1, 5)) # Prints a dictionary of all results from January 1, 2018 and January 5, # 2018 print(games.games) # relevant stats awayDefensiveRating = Boxscore.away_defensive_rating("201710310LAL") awayEFGP = Boxscore.away_effective_field_goal_percentage("201710310LAL") awayFGP = Boxscore.away_field_goal_percentage("201710310LAL") awayOffensiveRating = Boxscore.away_offensive_rating("201710310LAL") awayORebP = Boxscore.away_offensive_rebound_percentage("201710310LAL") awayPoints = Boxscore.away_points("201710310LAL") awayTrueShootP = Boxscore.away_true_shooting_percentage("201710310LAL") awayTOPercent = Boxscore.away_turnover_percentage("201710310LAL") homeDefensiveRating = Boxscore.home_defensive_rating("201710310LAL") homeEFGP = Boxscore.home_effective_field_goal_percentage("201710310LAL") homeFGP = Boxscore.home_field_goal_percentage("201710310LAL") homeOffensiveRating = Boxscore.home_offensive_rating("201710310LAL") homeORebP = Boxscore.home_offensive_rebound_percentage("201710310LAL") homePoints = Boxscore.home_points("201710310LAL") homeTrueShootP = Boxscore.home_true_shooting_percentage("201710310LAL")
def plot_player_game(players, season, stat, start_date=datetime.date(1900, 1, 1), end_date=datetime.date(3000, 1, 1), only_month=False, xlabel="Time", ylabel=None, scatter=True, return_type="img", cum=False): """ Uses Sportsreference Plots the graphs of players according to their performance in particular games. :param players: Basketball-reference id of a player or list of players :type players: String or list of strings :param season: The season in which the games are played :type season: Either in dashed form (2018-19) or single form (2019 means the season 2018-19) :param stat: The statistical attribute of the player to plot :type stat: String :param start_date: The date from which the data is plotted :type start_date: datetime.date format :param end_date: The date untill which data is plotted :type end_date: datetime.date format :param only_month: Wheter or not the ticks on the x-axis only contain months. (Recommended when plotting dates extending across dates more than a couple of months) :type only_month: Bool :param xlabel: The label on the x-axis on the returned plot :type xlabel: String :param ylabel: The label on the x-axis on the returned plot :type ylabel: String :param scatter: Wheter on not to include a dot for each data point in the graph :type scatter: Bool :param return_type: Various methods by which the graph can be returned :type return_type: "img": png image, "fig":Matplotlib figure and axes,"show": calls the matplotlib show function (appropriate for jupyter notebooks), "html": base64 image useful for rendering in html pages :param cum: Wheter results are cumulative or not :type cum: Bool """ if type(players) is not list: players = [players] player_obj = get_player_obj(players) fig, ax = plt.subplots() for player in player_obj: season = date_format(season) team = player(season).team_abbreviation sch = Schedule(team, date_format(season, format_as="single")) sch_df = sch.dataframe x = [] y = [] for index, row in sch_df.iterrows(): if start_date <= row['datetime'].date() <= end_date: box = Boxscore(index) if row['location'] == "Home": for boxplay in box.home_players: if boxplay.player_id == player.player_id: x.append(row['datetime'].date()) if cum: try: prev = y[-1] except: prev = 0 y.append(boxplay.dataframe[stat] + prev) else: y.append(boxplay.dataframe[stat]) elif row['location'] == "Away": for boxplay in box.away_players: if boxplay.player_id == player.player_id: x.append(row['datetime'].date()) if cum: try: prev = y[-1] except: prev = 0 y.append(boxplay.dataframe[stat] + prev) else: y.append(boxplay.dataframe[stat]) ax.plot(x, y, label=player.name) if scatter: ax.scatter(x, y) ax.legend() if only_month: ax.xaxis.set_major_locator(MonthLocator()) ax.xaxis.set_major_formatter(DateFormatter("%y-%m")) fig.autofmt_xdate() ax.set_xlabel(xlabel) if ylabel == None: ylabel = stat ax.set_ylabel(ylabel) return return_plot(stat, fig, ax, return_type)
def plot_team_game(teams, stat, season, start_date, end_date, opp=False, xlabel="Time", ylabel=None, only_month=False, scatter=True, return_type="img", cum=False): """ Uses Sportsreference :param teams: Basketball-reference id for team :type teams: String or list of strings :param stat: The statistical attribute of the player to plot :type stat: String :param season: The season in which the games are played :type season: Either in dashed form (2018-19) or single form (2019 means the season 2018-19) :param start_date: The date from which the data is plotted :type start_date: datetime.date format :param end_date: The date untill which data is plotted :type end_date: datetime.date format :param opp: Whether the stat is for the opponent :type opp: Bool :param xlabel: The label on the x-axis on the returned plot :type xlabel: String :param ylabel: The label on the Y-axis on the returned plot :type ylabel: String :param scatter: Whether on not to include a dot for each data point in the graph :type scatter: Bool :param return_type: Various methods by which the graph can be returned :type return_type: "img": png image, "fig":Matplotlib figure and axes,"show": calls the matplotlib show function (appropriate for jupyter notebooks), "html": base64 image useful for rendering in html pages :param cum: Whether results are cumulative or not :type cum: Bool """ fig, ax = plt.subplots() if type(teams) is not list: teams = [teams] for team in teams: x = [] y = [] sch = Schedule(team, season) for index, row in sch.dataframe.iterrows(): if start_date <= row['datetime'].date() <= end_date: box = Boxscore(index) stat_prefix = "" stat_prefix_reversal = {"home_": "away_", "away_": "home_"} if row['location'] == "Home": stat_prefix = "home_" elif row['location'] == "Away": stat_prefix = "away_" if opp: stat_prefix = stat_prefix_reversal[stat_prefix] x.append(row['datetime'].date()) if cum: try: prev = y[-1] except: prev = 0 y.append(int(box.dataframe[stat_prefix + stat]) + prev) else: y.append(int(box.dataframe[stat_prefix + stat])) ax.plot(x, y, label=team) if scatter: ax.scatter(x, y) ax.legend() if only_month: ax.xaxis.set_major_locator(MonthLocator()) ax.xaxis.set_major_formatter(DateFormatter("%y-%m")) fig.autofmt_xdate() ax.set_xlabel(xlabel) if ylabel == None: if opp: ax.set_ylabel("opp_" + stat) else: ax.set_ylabel(stat) return return_plot(stat, fig, ax, return_type)