def test_nfl_boxscore_string_representation(self): expected = ('Boxscore for Houston Texans at Kansas City Chiefs ' '(Thursday Sep 10, 2020)') boxscore = Boxscore(BOXSCORE) assert boxscore.__repr__() == expected
def test_nfl_boxscore_players(self): boxscore = Boxscore(BOXSCORE) assert len(boxscore.home_players) == 33 assert len(boxscore.away_players) == 28 for player in boxscore.home_players: assert not player.dataframe.empty for player in boxscore.away_players: assert not player.dataframe.empty
def test_invalid_url_yields_empty_class(self): flexmock(Boxscore) \ .should_receive('_retrieve_html_page') \ .and_return(None) boxscore = Boxscore(BOXSCORE) for key, value in boxscore.__dict__.items(): if key == '_uri': continue assert value is None
def test_game_summary_with_no_scores_returns_none(self): result = Boxscore(None)._parse_summary( pq("""<table class="linescore nohover stats_table no_freeze"> <tbody> <tr> <td class="center"></td> <td class="center"></td> </tr> <tr> <td class="center"></td> <td class="center"></td> </tr> </tbody> </table>""")) assert result == {'away': [None], 'home': [None]}
def game_data_from_year(year): """Load the data from a certain year. Parameters ---------- year : int The year to get the data from Returns ------- DataFrame A DataFrame containing data for all the games in the year """ # Weeks FIRST_WEEK = 1 LAST_WEEK = 21 # Game data game_data = pd.DataFrame() try: # Retrieve data for each week in the year for week in range(FIRST_WEEK, LAST_WEEK + 1): # Week data week_data = pd.DataFrame() # Retrieve data for each game in the week for game in range(len(Boxscores(week, year).games[str(week)+"-"+str(year)])): # Box score stats box_score = Boxscore(Boxscores(week, year).games[str(week)+"-"+str(year)][game]['boxscore']).dataframe # Add box score stats for the game to the data for # the week week_data = pd.concat([week_data, box_score]) # Add a column for week number to the data for the week # and store the number of the current week week_data["week"] = [week]*len(Boxscores(week,year).games[str(week)+"-"+str(year)]) # Add the data for the week to the game data for # the year game_data = pd.concat([game_data, week_data]) except: print("ERROR: Data loading failed") return -1 else: return game_data
def setup_method(self, *args, **kwargs): self.results = { 'date': 'Thursday Sep 10, 2020', 'time': '8:20pm', 'datetime': datetime(2020, 9, 10, 20, 20), 'stadium': 'Arrowhead Stadium', 'attendance': 15895, 'duration': '2:53', 'winner': HOME, 'winning_name': 'Kansas City Chiefs', 'winning_abbr': 'KAN', 'losing_name': 'Houston Texans', 'losing_abbr': 'HTX', 'won_toss': 'Chiefs (deferred)', 'weather': '56 degrees, relative humidity 95%, wind 7 mph', 'vegas_line': 'Kansas City Chiefs -9.5', 'surface': 'Astroturf', 'roof': 'Outdoors', 'over_under': '53.5 (over)', 'away_points': 20, 'away_first_downs': 21, 'away_rush_attempts': 22, 'away_rush_yards': 118, 'away_rush_touchdowns': 2, 'away_pass_completions': 20, 'away_pass_attempts': 32, 'away_pass_yards': 253, 'away_pass_touchdowns': 1, 'away_interceptions': 1, 'away_times_sacked': 4, 'away_yards_lost_from_sacks': 11, 'away_net_pass_yards': 242, 'away_total_yards': 360, 'away_fumbles': 0, 'away_fumbles_lost': 0, 'away_turnovers': 1, 'away_penalties': 5, 'away_yards_from_penalties': 37, 'away_third_down_conversions': 4, 'away_third_down_attempts': 10, 'away_fourth_down_conversions': 1, 'away_fourth_down_attempts': 1, 'away_time_of_possession': '25:13', 'home_points': 34, 'home_first_downs': 28, 'home_rush_attempts': 34, 'home_rush_yards': 166, 'home_rush_touchdowns': 1, 'home_pass_completions': 24, 'home_pass_attempts': 32, 'home_pass_yards': 211, 'home_pass_touchdowns': 3, 'home_interceptions': 0, 'home_times_sacked': 1, 'home_yards_lost_from_sacks': 8, 'home_net_pass_yards': 203, 'home_total_yards': 369, 'home_fumbles': 0, 'home_fumbles_lost': 0, 'home_turnovers': 0, 'home_penalties': 1, 'home_yards_from_penalties': 5, 'home_third_down_conversions': 7, 'home_third_down_attempts': 13, 'home_fourth_down_conversions': 1, 'home_fourth_down_attempts': 1, 'home_time_of_possession': '34:47', } flexmock(utils) \ .should_receive('_todays_date') \ .and_return(MockDateTime(YEAR, MONTH)) self.boxscore = Boxscore(BOXSCORE)
def retrieve_schedule(season, team, first_game, cred='credentials.json'): """Used to retrieve the schedule information for played games of a specified team and insert the info into the NFL DB in the local MySQL server. year: int of year the season started in, takes into account spillover into Jan/Feb team: Used to specify which team will be retrieved first_game: YYYY/MM/DD date in which the first game of the season is played, used for week calculation :return : None, uploads teams (home and away), final score, week, season, and date to games table """ # establishing connection and cursor f = open(cred) login = json.load(f) engine = sql.create_engine( "mysql+pymysql://{}:{}@localhost:3306/nfl".format( str(login['userId']), str(login['password']))) f.close() # Establishing some necessary objects for iteration schedule = Team(team_name=team, year=season).schedule months_dict = { 'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12' } # Iterating through each game and adding home games to the schedule for game in schedule: # creating the necessary url and retrieving box score game_split = str(game).split(' ') # handling for Jan/Feb spillover into the next year if str(game).split(' ')[0] == 'January' or str(game).split( ' ')[0] == 'February': game_id = str(season + 1) + months_dict[game_split[0]] + str( game_split[1].zfill(2)) + '0' + team.lower() boxscore = Boxscore(uri=game_id) else: game_id = str(season) + months_dict[game_split[0]] + str( game_split[1].zfill(2)) + '0' + team.lower() boxscore = Boxscore(uri=game_id) # boxscore url's are only generated for home games, so handling for such exceptions if boxscore.home_abbreviation == 'None': print( 'Game data does not exist. Game is either away game for {} or is yet to be played.' .format(team)) else: # Unique index already created to avoid duplicates in table when updating, error handling for this try: game_data = { 'game_id': game_id, 'home': str(boxscore.home_abbreviation).upper(), 'away': str(boxscore.away_abbreviation).upper(), 'home_score': boxscore.home_points, 'away_score': boxscore.away_points, 'season': season, 'week_no': ((boxscore.datetime - dt.datetime.strptime( first_game, '%Y/%m/%d')).days // 7) + 1, 'game_date': dt.date.strftime(boxscore.datetime, '%Y-%m-%d') } df = pd.DataFrame(game_data, index=[0]) df.to_sql('dim_nfl_games', con=engine, index=False, if_exists='append') print('Game inputted: {} vs. {} on {} (Week {})'.format( str(boxscore.home_abbreviation), str(boxscore.away_abbreviation), dt.date.strftime(boxscore.datetime, '%Y-%m-%d'), ((boxscore.datetime - dt.datetime.strptime( first_game, '%Y/%m/%d')).days // 7) + 1)) except sql.exc.IntegrityError: print('Game: {} vs. {} on {} already present in table.'.format( str(boxscore.home_abbreviation), str(boxscore.away_abbreviation), dt.date.strftime(boxscore.datetime, '%Y-%m-%d'))) print('Schedule inputted: {} for {}'.format(team, season))
def boxscore(self): """ Returns an instance of the Boxscore class containing more detailed stats on the game. """ return Boxscore(self._boxscore)
def setup_method(self, *args, **kwargs): flexmock(Boxscore) \ .should_receive('_parse_game_data') \ .and_return(None) self.boxscore = Boxscore(None)
def test_url_404_page_returns_none(self): result = Boxscore(None)._retrieve_html_page('404') assert result is None
def test_invalid_url_returns_none(self, *args, **kwargs): result = Boxscore(None)._retrieve_html_page('bad') assert result is None
'interceptions', 'yards_returned_from_interception', 'interceptions_returned_for_touchdown', 'longest_interception_return', 'passes_defended', 'sacks', 'combined_tackles', 'solo_tackles', 'assists_on_tackles', 'tackles_for_loss', 'quarterback_hits', 'fumbles_recovered', 'yards_recovered_from_fumble', 'fumbles_recovered_for_touchdown', 'fumbles_forced', 'kickoff_returns', 'kickoff_return_yards', 'average_kickoff_return_yards', 'kickoff_return_touchdown', 'longest_kickoff_return', 'punt_returns', 'punt_return_yards', 'yards_per_punt_return', 'punt_return_touchdown', 'longest_punt_return', 'extra_points_made', 'extra_points_attempted', 'field_goals_made', 'field_goals_attempted', 'punts', 'total_punt_yards', 'yards_per_punt', 'longest_punt' ]) for x in sb_app.keys(): game_data = Boxscore(x) home_df = game_data.home_players[0].dataframe for player in game_data.home_players[1:]: home_df = pd.concat([home_df, player.dataframe], axis=0) home_df['name'] = [x.name for x in game_data.home_players] home_df['team'] = game_data.home_abbreviation home_df['season'] = sb_app[x] away_df = game_data.away_players[0].dataframe for player in game_data.away_players[1:]: away_df = pd.concat([away_df, player.dataframe], axis=0) away_df['name'] = [x.name for x in game_data.away_players] away_df['team'] = game_data.away_abbreviation away_df['season'] = sb_app[x]
def get_nfl_player_performance(season, cred='credentials.json'): # establishing connection and cursor f = open(cred) login = json.load(f) engine = sql.create_engine( "mysql+pymysql://{}:{}@localhost:3306/nfl".format( str(login['userId']), str(login['password']))) f.close() # Iterating through teams teams = Teams.Teams(year=season) for team in teams: team_id = team.abbreviation.upper() schedule = Team(team_name=team_id, year=season).schedule months_dict = { 'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12' } # Iterating through the schedule of each one of those teams for game in schedule: game_split = str(game).split(' ') # handling for Jan/Feb spillover into the next year if str(game).split(' ')[0] == 'January' or str(game).split( ' ')[0] == 'February': game_id = str(season + 1) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) + '0' \ + team_id.lower() boxscore = Boxscore(uri=game_id) else: game_id = uri = str(season) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) \ + '0' + team_id.lower() boxscore = Boxscore(uri=game_id) # boxscore url's are only generated for home games, so handling for such exceptions if boxscore.home_abbreviation == 'None': print( 'Game data does not exist. Game is either away game for {} or is yet to be played.' .format(team)) pass else: print("Inputting {} for team {}".format(game_id, team_id)) # inputting home player performance for player in boxscore.home_players: home_df = player.dataframe home_df['game_id'] = game_id home_df['team_id'] = team_id home_df['player_id'] = home_df.index[0] try: home_df.to_sql('fact_nfl_performance', con=engine, index=False, if_exists='append') print('Inputted: {} {} for game {}.'.format( player.name, team_id, game_id)) except sql.exc.IntegrityError: print( 'Player {} performance for {} already present in performance table.' .format(player.name, game_id)) except AttributeError: print('Exception: Empty String') # inputting away player performance for player in boxscore.away_players: away_df = player.dataframe away_df['game_id'] = game_id away_df['team_id'] = str( boxscore.away_abbreviation).upper() away_df['player_id'] = away_df.index[0] try: away_df.to_sql('fact_nfl_performance', con=engine, index=False, if_exists='append') print('Inputted: {} {} for game {}.'.format( player.name, team_id, game_id)) except sql.exc.IntegrityError: print( 'Player {} performance for {} already present in performance table.' .format(player.name, game_id)) except AttributeError: print('Exception: Empty String')