def test_nfl_boxscore_string_representation(self): expected = ('Boxscore for Houston Texans at Kansas City Chiefs ' '(Thursday Sep 10, 2020)') boxscore = Boxscore(BOXSCORE) assert boxscore.__repr__() == expected
def test_nfl_boxscore_players(self): boxscore = Boxscore(BOXSCORE) assert len(boxscore.home_players) == 33 assert len(boxscore.away_players) == 28 for player in boxscore.home_players: assert not player.dataframe.empty for player in boxscore.away_players: assert not player.dataframe.empty
def test_invalid_url_yields_empty_class(self): flexmock(Boxscore) \ .should_receive('_retrieve_html_page') \ .and_return(None) boxscore = Boxscore(BOXSCORE) for key, value in boxscore.__dict__.items(): if key == '_uri': continue assert value is None
def test_game_summary_with_no_scores_returns_none(self): result = Boxscore(None)._parse_summary( pq("""<table class="linescore nohover stats_table no_freeze"> <tbody> <tr> <td class="center"></td> <td class="center"></td> </tr> <tr> <td class="center"></td> <td class="center"></td> </tr> </tbody> </table>""")) assert result == {'away': [None], 'home': [None]}
def game_data_from_year(year): """Load the data from a certain year. Parameters ---------- year : int The year to get the data from Returns ------- DataFrame A DataFrame containing data for all the games in the year """ # Weeks FIRST_WEEK = 1 LAST_WEEK = 21 # Game data game_data = pd.DataFrame() try: # Retrieve data for each week in the year for week in range(FIRST_WEEK, LAST_WEEK + 1): # Week data week_data = pd.DataFrame() # Retrieve data for each game in the week for game in range(len(Boxscores(week, year).games[str(week)+"-"+str(year)])): # Box score stats box_score = Boxscore(Boxscores(week, year).games[str(week)+"-"+str(year)][game]['boxscore']).dataframe # Add box score stats for the game to the data for # the week week_data = pd.concat([week_data, box_score]) # Add a column for week number to the data for the week # and store the number of the current week week_data["week"] = [week]*len(Boxscores(week,year).games[str(week)+"-"+str(year)]) # Add the data for the week to the game data for # the year game_data = pd.concat([game_data, week_data]) except: print("ERROR: Data loading failed") return -1 else: return game_data
def setup_method(self, *args, **kwargs): self.results = { 'date': 'Thursday Sep 10, 2020', 'time': '8:20pm', 'datetime': datetime(2020, 9, 10, 20, 20), 'stadium': 'Arrowhead Stadium', 'attendance': 15895, 'duration': '2:53', 'winner': HOME, 'winning_name': 'Kansas City Chiefs', 'winning_abbr': 'KAN', 'losing_name': 'Houston Texans', 'losing_abbr': 'HTX', 'won_toss': 'Chiefs (deferred)', 'weather': '56 degrees, relative humidity 95%, wind 7 mph', 'vegas_line': 'Kansas City Chiefs -9.5', 'surface': 'Astroturf', 'roof': 'Outdoors', 'over_under': '53.5 (over)', 'away_points': 20, 'away_first_downs': 21, 'away_rush_attempts': 22, 'away_rush_yards': 118, 'away_rush_touchdowns': 2, 'away_pass_completions': 20, 'away_pass_attempts': 32, 'away_pass_yards': 253, 'away_pass_touchdowns': 1, 'away_interceptions': 1, 'away_times_sacked': 4, 'away_yards_lost_from_sacks': 11, 'away_net_pass_yards': 242, 'away_total_yards': 360, 'away_fumbles': 0, 'away_fumbles_lost': 0, 'away_turnovers': 1, 'away_penalties': 5, 'away_yards_from_penalties': 37, 'away_third_down_conversions': 4, 'away_third_down_attempts': 10, 'away_fourth_down_conversions': 1, 'away_fourth_down_attempts': 1, 'away_time_of_possession': '25:13', 'home_points': 34, 'home_first_downs': 28, 'home_rush_attempts': 34, 'home_rush_yards': 166, 'home_rush_touchdowns': 1, 'home_pass_completions': 24, 'home_pass_attempts': 32, 'home_pass_yards': 211, 'home_pass_touchdowns': 3, 'home_interceptions': 0, 'home_times_sacked': 1, 'home_yards_lost_from_sacks': 8, 'home_net_pass_yards': 203, 'home_total_yards': 369, 'home_fumbles': 0, 'home_fumbles_lost': 0, 'home_turnovers': 0, 'home_penalties': 1, 'home_yards_from_penalties': 5, 'home_third_down_conversions': 7, 'home_third_down_attempts': 13, 'home_fourth_down_conversions': 1, 'home_fourth_down_attempts': 1, 'home_time_of_possession': '34:47', } flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.boxscore = Boxscore(BOXSCORE)
def retrieve_schedule(season, team, first_game, cred='credentials.json'): """Used to retrieve the schedule information for played games of a specified team and insert the info into the NFL DB in the local MySQL server. year: int of year the season started in, takes into account spillover into Jan/Feb team: Used to specify which team will be retrieved first_game: YYYY/MM/DD date in which the first game of the season is played, used for week calculation :return : None, uploads teams (home and away), final score, week, season, and date to games table """ # establishing connection and cursor f = open(cred) login = json.load(f) engine = sql.create_engine( "mysql+pymysql://{}:{}@localhost:3306/nfl".format( str(login['userId']), str(login['password']))) f.close() # Establishing some necessary objects for iteration schedule = Team(team_name=team, year=season).schedule months_dict = { 'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12' } # Iterating through each game and adding home games to the schedule for game in schedule: # creating the necessary url and retrieving box score game_split = str(game).split(' ') # handling for Jan/Feb spillover into the next year if str(game).split(' ')[0] == 'January' or str(game).split( ' ')[0] == 'February': game_id = str(season + 1) + months_dict[game_split[0]] + str( game_split[1].zfill(2)) + '0' + team.lower() boxscore = Boxscore(uri=game_id) else: game_id = str(season) + months_dict[game_split[0]] + str( game_split[1].zfill(2)) + '0' + team.lower() boxscore = Boxscore(uri=game_id) # boxscore url's are only generated for home games, so handling for such exceptions if boxscore.home_abbreviation == 'None': print( 'Game data does not exist. Game is either away game for {} or is yet to be played.' .format(team)) else: # Unique index already created to avoid duplicates in table when updating, error handling for this try: game_data = { 'game_id': game_id, 'home': str(boxscore.home_abbreviation).upper(), 'away': str(boxscore.away_abbreviation).upper(), 'home_score': boxscore.home_points, 'away_score': boxscore.away_points, 'season': season, 'week_no': ((boxscore.datetime - dt.datetime.strptime( first_game, '%Y/%m/%d')).days // 7) + 1, 'game_date': dt.date.strftime(boxscore.datetime, '%Y-%m-%d') } df = pd.DataFrame(game_data, index=[0]) df.to_sql('dim_nfl_games', con=engine, index=False, if_exists='append') print('Game inputted: {} vs. {} on {} (Week {})'.format( str(boxscore.home_abbreviation), str(boxscore.away_abbreviation), dt.date.strftime(boxscore.datetime, '%Y-%m-%d'), ((boxscore.datetime - dt.datetime.strptime( first_game, '%Y/%m/%d')).days // 7) + 1)) except sql.exc.IntegrityError: print('Game: {} vs. {} on {} already present in table.'.format( str(boxscore.home_abbreviation), str(boxscore.away_abbreviation), dt.date.strftime(boxscore.datetime, '%Y-%m-%d'))) print('Schedule inputted: {} for {}'.format(team, season))
def boxscore(self): """ Returns an instance of the Boxscore class containing more detailed stats on the game. """ return Boxscore(self._boxscore)
def setup_method(self, *args, **kwargs): flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) self.boxscore = Boxscore(None)
class TestNFLBoxscore: @patch('requests.get', side_effect=mock_pyquery) def setup_method(self, *args, **kwargs): flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) self.boxscore = Boxscore(None) def test_away_team_wins(self): fake_away_points = PropertyMock(return_value=28) fake_home_points = PropertyMock(return_value=21) type(self.boxscore)._away_points = fake_away_points type(self.boxscore)._home_points = fake_home_points assert self.boxscore.winner == AWAY def test_home_team_wins(self): fake_away_points = PropertyMock(return_value=21) fake_home_points = PropertyMock(return_value=28) type(self.boxscore)._away_points = fake_away_points type(self.boxscore)._home_points = fake_home_points assert self.boxscore.winner == HOME def test_winning_name_is_home(self): expected_name = 'Home Name' fake_winner = PropertyMock(return_value=HOME) fake_home_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_name = fake_home_name assert self.boxscore.winning_name == expected_name def test_winning_name_is_away(self): expected_name = 'Away Name' fake_winner = PropertyMock(return_value=AWAY) fake_away_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_name = fake_away_name assert self.boxscore.winning_name == expected_name def test_winning_abbr_is_home(self): expected_name = 'HOME' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=HOME) type(self.boxscore).winner = fake_winner assert self.boxscore.winning_abbr == expected_name def test_winning_abbr_is_away(self): expected_name = 'AWAY' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=AWAY) type(self.boxscore).winner = fake_winner assert self.boxscore.winning_abbr == expected_name def test_losing_name_is_home(self): expected_name = 'Home Name' fake_winner = PropertyMock(return_value=AWAY) fake_home_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._home_name = fake_home_name assert self.boxscore.losing_name == expected_name def test_losing_name_is_away(self): expected_name = 'Away Name' fake_winner = PropertyMock(return_value=HOME) fake_away_name = PropertyMock(return_value=MockName(expected_name)) type(self.boxscore).winner = fake_winner type(self.boxscore)._away_name = fake_away_name assert self.boxscore.losing_name == expected_name def test_losing_abbr_is_home(self): expected_name = 'HOME' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=AWAY) type(self.boxscore).winner = fake_winner assert self.boxscore.losing_abbr == expected_name def test_losing_abbr_is_away(self): expected_name = 'AWAY' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return(expected_name) fake_winner = PropertyMock(return_value=HOME) type(self.boxscore).winner = fake_winner assert self.boxscore.losing_abbr == expected_name def test_game_summary_with_no_scores_returns_none(self): result = Boxscore(None)._parse_summary( pq("""<table class="linescore nohover stats_table no_freeze"> <tbody> <tr> <td class="center"></td> <td class="center"></td> </tr> <tr> <td class="center"></td> <td class="center"></td> </tr> </tbody> </table>""")) assert result == {'away': [None], 'home': [None]} @patch('requests.get', side_effect=mock_pyquery) def test_invalid_url_returns_none(self, *args, **kwargs): result = Boxscore(None)._retrieve_html_page('bad') assert result is None def test_url_404_page_returns_none(self): result = Boxscore(None)._retrieve_html_page('404') assert result is None def test_no_class_information_returns_dataframe_of_none(self): mock_points = PropertyMock(return_value=None) type(self.boxscore)._home_points = mock_points type(self.boxscore)._away_points = mock_points assert self.boxscore.dataframe is None def test_empty_attribute_returns_none(self): fake_rushes = PropertyMock(return_value=None) type(self.boxscore)._away_rush_attempts = fake_rushes assert self.boxscore.away_rush_attempts is None def test_non_int_value_returns_none(self): fake_rushes = PropertyMock(return_value='bad') type(self.boxscore)._away_rush_attempts = fake_rushes assert self.boxscore.away_rush_attempts is None def test_nfl_game_information(self): fields = { 'attendance': 62881, 'date': 'Thursday Nov 8, 2018', 'duration': '2:49', 'stadium': 'Heinz Field', 'time': '8:20pm' } mock_field = """Thursday Nov 8, 2018 Start Time: 8:20pm Stadium: Heinz Field Attendance: 62,881 Time of Game: 2:49 Logos via Sports Logos.net / About logos """ m = MockBoxscoreData(MockField(mock_field)) self.boxscore._parse_game_date_and_location(m) for field, value in fields.items(): assert getattr(self.boxscore, field) == value def test_nfl_game_limited_information(self): fields = { 'attendance': 22000, 'date': 'Sunday Sep 8, 1940', 'duration': None, 'stadium': 'Forbes Field', 'time': None } mock_field = """Sunday Sep 8, 1940 Stadium: Forbes Field Attendance: 22,000 Logos via Sports Logos.net / About logos """ m = MockBoxscoreData(MockField(mock_field)) self.boxscore._parse_game_date_and_location(m) for field, value in fields.items(): assert getattr(self.boxscore, field) == value def test_nfl_away_abbreviation(self): away_name = PropertyMock(return_value='<a href="/teams/kan/2018.htm" \ itemprop="name">Kansas City Chiefs</a>') type(self.boxscore)._away_name = away_name assert self.boxscore.away_abbreviation == 'kan' def test_nfl_home_abbreviation(self): home_name = PropertyMock(return_value='<a href="/teams/nwe/2018.htm" \ itemprop="name">New England Patriots</a>') type(self.boxscore)._home_name = home_name assert self.boxscore.home_abbreviation == 'nwe' def test_nfl_datetime_missing_time(self): date = PropertyMock(return_value='Sunday Oct 7, 2018') time = PropertyMock(return_value=None) type(self.boxscore)._date = date type(self.boxscore)._time = time assert self.boxscore.datetime == datetime(2018, 10, 7) def test_nfl_game_details(self): fields = { 'won_toss': 'Dolphins', 'roof': 'Outdoors', 'surface': 'Fieldturf', 'weather': '87 degrees, wind 4 mph', 'vegas_line': 'Cincinnati Bengals -6.5', 'over_under': '47.5 (under)' } mock_field = """<table id="game_info"> <tr><th data-stat="info">Won Toss</th><td data-stat="stat">Dolphins</td></tr> <tr><th data-stat="info">Roof</th><td data-stat="stat">outdoors</td></tr> <tr><th data-stat="info">Surface</th><td data-stat="stat">fieldturf </td></tr> <tr><th data-stat="info">Duration</th><td data-stat="stat">3:02</td></tr> <tr><th data-stat="info">Attendance</th><td data-stat="stat">52,708</td></tr> <tr><th data-stat="info">Weather</th> <td data-stat="stat">87 degrees, wind 4 mph</td></tr> <tr><th data-stat="info">Vegas Line</th> <td data-stat="stat">Cincinnati Bengals -6.5</td></tr> <tr><th data-stat="info">Over/Under</th> <td data-stat="stat">47.5 <b>(under)</b></td></tr> </table> """ self.boxscore._parse_game_details(pq(mock_field)) for field, value in fields.items(): assert getattr(self.boxscore, field) == value def test_finding_home_team_with_no_abbrs(self): mock_html = pq('<td data-stat="team">KAN</td>') abbr = PropertyMock(return_value='KAN') self.boxscore._home_abbr = None self.boxscore._away_abbr = None type(self.boxscore).home_abbreviation = abbr team = self.boxscore._find_home_or_away(mock_html) assert team == HOME def test_finding_away_team_with_no_abbrs(self): mock_html = pq('<td data-stat="team">HTX</td>') abbr = PropertyMock(return_value='KAN') self.boxscore._home_abbr = None self.boxscore._away_abbr = None type(self.boxscore).home_abbreviation = abbr team = self.boxscore._find_home_or_away(mock_html) assert team == AWAY def test_missing_abbreviations(self): table = '<table id="team_stats"><thead></thead></table>' output = self.boxscore._alt_abbreviations(pq(table)) assert output == (None, None)
def test_url_404_page_returns_none(self): result = Boxscore(None)._retrieve_html_page('404') assert result is None
def test_invalid_url_returns_none(self, *args, **kwargs): result = Boxscore(None)._retrieve_html_page('bad') assert result is None
'interceptions', 'yards_returned_from_interception', 'interceptions_returned_for_touchdown', 'longest_interception_return', 'passes_defended', 'sacks', 'combined_tackles', 'solo_tackles', 'assists_on_tackles', 'tackles_for_loss', 'quarterback_hits', 'fumbles_recovered', 'yards_recovered_from_fumble', 'fumbles_recovered_for_touchdown', 'fumbles_forced', 'kickoff_returns', 'kickoff_return_yards', 'average_kickoff_return_yards', 'kickoff_return_touchdown', 'longest_kickoff_return', 'punt_returns', 'punt_return_yards', 'yards_per_punt_return', 'punt_return_touchdown', 'longest_punt_return', 'extra_points_made', 'extra_points_attempted', 'field_goals_made', 'field_goals_attempted', 'punts', 'total_punt_yards', 'yards_per_punt', 'longest_punt' ]) for x in sb_app.keys(): game_data = Boxscore(x) home_df = game_data.home_players[0].dataframe for player in game_data.home_players[1:]: home_df = pd.concat([home_df, player.dataframe], axis=0) home_df['name'] = [x.name for x in game_data.home_players] home_df['team'] = game_data.home_abbreviation home_df['season'] = sb_app[x] away_df = game_data.away_players[0].dataframe for player in game_data.away_players[1:]: away_df = pd.concat([away_df, player.dataframe], axis=0) away_df['name'] = [x.name for x in game_data.away_players] away_df['team'] = game_data.away_abbreviation away_df['season'] = sb_app[x]
def get_nfl_player_performance(season, cred='credentials.json'): # establishing connection and cursor f = open(cred) login = json.load(f) engine = sql.create_engine( "mysql+pymysql://{}:{}@localhost:3306/nfl".format( str(login['userId']), str(login['password']))) f.close() # Iterating through teams teams = Teams.Teams(year=season) for team in teams: team_id = team.abbreviation.upper() schedule = Team(team_name=team_id, year=season).schedule months_dict = { 'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12' } # Iterating through the schedule of each one of those teams for game in schedule: game_split = str(game).split(' ') # handling for Jan/Feb spillover into the next year if str(game).split(' ')[0] == 'January' or str(game).split( ' ')[0] == 'February': game_id = str(season + 1) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) + '0' \ + team_id.lower() boxscore = Boxscore(uri=game_id) else: game_id = uri = str(season) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) \ + '0' + team_id.lower() boxscore = Boxscore(uri=game_id) # boxscore url's are only generated for home games, so handling for such exceptions if boxscore.home_abbreviation == 'None': print( 'Game data does not exist. Game is either away game for {} or is yet to be played.' .format(team)) pass else: print("Inputting {} for team {}".format(game_id, team_id)) # inputting home player performance for player in boxscore.home_players: home_df = player.dataframe home_df['game_id'] = game_id home_df['team_id'] = team_id home_df['player_id'] = home_df.index[0] try: home_df.to_sql('fact_nfl_performance', con=engine, index=False, if_exists='append') print('Inputted: {} {} for game {}.'.format( player.name, team_id, game_id)) except sql.exc.IntegrityError: print( 'Player {} performance for {} already present in performance table.' .format(player.name, game_id)) except AttributeError: print('Exception: Empty String') # inputting away player performance for player in boxscore.away_players: away_df = player.dataframe away_df['game_id'] = game_id away_df['team_id'] = str( boxscore.away_abbreviation).upper() away_df['player_id'] = away_df.index[0] try: away_df.to_sql('fact_nfl_performance', con=engine, index=False, if_exists='append') print('Inputted: {} {} for game {}.'.format( player.name, team_id, game_id)) except sql.exc.IntegrityError: print( 'Player {} performance for {} already present in performance table.' .format(player.name, game_id)) except AttributeError: print('Exception: Empty String')