Example #1
0
    def test_nfl_boxscore_string_representation(self):
        expected = ('Boxscore for Houston Texans at Kansas City Chiefs '
                    '(Thursday Sep 10, 2020)')

        boxscore = Boxscore(BOXSCORE)

        assert boxscore.__repr__() == expected
Example #2
0
    def test_nfl_boxscore_players(self):
        boxscore = Boxscore(BOXSCORE)

        assert len(boxscore.home_players) == 33
        assert len(boxscore.away_players) == 28

        for player in boxscore.home_players:
            assert not player.dataframe.empty
        for player in boxscore.away_players:
            assert not player.dataframe.empty
Example #3
0
    def test_invalid_url_yields_empty_class(self):
        flexmock(Boxscore) \
            .should_receive('_retrieve_html_page') \
            .and_return(None)

        boxscore = Boxscore(BOXSCORE)

        for key, value in boxscore.__dict__.items():
            if key == '_uri':
                continue
            assert value is None
Example #4
0
    def test_game_summary_with_no_scores_returns_none(self):
        result = Boxscore(None)._parse_summary(
            pq("""<table class="linescore nohover stats_table no_freeze">
    <tbody>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
        <tr>
            <td class="center"></td>
            <td class="center"></td>
        </tr>
    </tbody>
</table>"""))

        assert result == {'away': [None], 'home': [None]}
Example #5
0
def game_data_from_year(year):
    """Load the data from a certain year. 
    
    Parameters
    ----------
    year : int
        The year to get the data from
    
    Returns
    -------
    DataFrame
        A DataFrame containing data for all the games in the year
    """
    # Weeks
    FIRST_WEEK = 1
    LAST_WEEK  = 21

    # Game data
    game_data = pd.DataFrame()
    
    try: 
        # Retrieve data for each week in the year
        for week in range(FIRST_WEEK, LAST_WEEK + 1):
            # Week data
            week_data = pd.DataFrame()

            # Retrieve data for each game in the week
            for game in range(len(Boxscores(week, year).games[str(week)+"-"+str(year)])):
                # Box score stats
                box_score = Boxscore(Boxscores(week, year).games[str(week)+"-"+str(year)][game]['boxscore']).dataframe

                # Add box score stats for the game to the data for
                # the week
                week_data = pd.concat([week_data, box_score])

            # Add a column for week number to the data for the week
            # and store the number of the current week
            week_data["week"] = [week]*len(Boxscores(week,year).games[str(week)+"-"+str(year)])

            # Add the data for the week to the game data for 
            # the year
            game_data = pd.concat([game_data, week_data])
    except:
        print("ERROR: Data loading failed")
        return -1
    else:
        return game_data
Example #6
0
    def setup_method(self, *args, **kwargs):
        self.results = {
            'date': 'Thursday Sep 10, 2020',
            'time': '8:20pm',
            'datetime': datetime(2020, 9, 10, 20, 20),
            'stadium': 'Arrowhead Stadium',
            'attendance': 15895,
            'duration': '2:53',
            'winner': HOME,
            'winning_name': 'Kansas City Chiefs',
            'winning_abbr': 'KAN',
            'losing_name': 'Houston Texans',
            'losing_abbr': 'HTX',
            'won_toss': 'Chiefs (deferred)',
            'weather': '56 degrees, relative humidity 95%, wind 7 mph',
            'vegas_line': 'Kansas City Chiefs -9.5',
            'surface': 'Astroturf',
            'roof': 'Outdoors',
            'over_under': '53.5 (over)',
            'away_points': 20,
            'away_first_downs': 21,
            'away_rush_attempts': 22,
            'away_rush_yards': 118,
            'away_rush_touchdowns': 2,
            'away_pass_completions': 20,
            'away_pass_attempts': 32,
            'away_pass_yards': 253,
            'away_pass_touchdowns': 1,
            'away_interceptions': 1,
            'away_times_sacked': 4,
            'away_yards_lost_from_sacks': 11,
            'away_net_pass_yards': 242,
            'away_total_yards': 360,
            'away_fumbles': 0,
            'away_fumbles_lost': 0,
            'away_turnovers': 1,
            'away_penalties': 5,
            'away_yards_from_penalties': 37,
            'away_third_down_conversions': 4,
            'away_third_down_attempts': 10,
            'away_fourth_down_conversions': 1,
            'away_fourth_down_attempts': 1,
            'away_time_of_possession': '25:13',
            'home_points': 34,
            'home_first_downs': 28,
            'home_rush_attempts': 34,
            'home_rush_yards': 166,
            'home_rush_touchdowns': 1,
            'home_pass_completions': 24,
            'home_pass_attempts': 32,
            'home_pass_yards': 211,
            'home_pass_touchdowns': 3,
            'home_interceptions': 0,
            'home_times_sacked': 1,
            'home_yards_lost_from_sacks': 8,
            'home_net_pass_yards': 203,
            'home_total_yards': 369,
            'home_fumbles': 0,
            'home_fumbles_lost': 0,
            'home_turnovers': 0,
            'home_penalties': 1,
            'home_yards_from_penalties': 5,
            'home_third_down_conversions': 7,
            'home_third_down_attempts': 13,
            'home_fourth_down_conversions': 1,
            'home_fourth_down_attempts': 1,
            'home_time_of_possession': '34:47',
        }
        flexmock(utils) \
            .should_receive('_todays_date') \
            .and_return(MockDateTime(YEAR, MONTH))

        self.boxscore = Boxscore(BOXSCORE)
def retrieve_schedule(season, team, first_game, cred='credentials.json'):
    """Used to retrieve the schedule information for played games of a specified team and insert the info into the NFL
    DB in the local MySQL server.
    year: int of year the season started in, takes into account spillover into Jan/Feb
    team: Used to specify which team will be retrieved
    first_game: YYYY/MM/DD date in which the first game of the season is played, used for week calculation
    :return : None, uploads teams (home and away), final score, week, season, and date to games table
    """

    # establishing connection and cursor
    f = open(cred)

    login = json.load(f)

    engine = sql.create_engine(
        "mysql+pymysql://{}:{}@localhost:3306/nfl".format(
            str(login['userId']), str(login['password'])))

    f.close()

    # Establishing some necessary objects for iteration
    schedule = Team(team_name=team, year=season).schedule
    months_dict = {
        'January': '01',
        'February': '02',
        'March': '03',
        'April': '04',
        'May': '05',
        'June': '06',
        'July': '07',
        'August': '08',
        'September': '09',
        'October': '10',
        'November': '11',
        'December': '12'
    }

    # Iterating through each game and adding home games to the schedule
    for game in schedule:

        # creating the necessary url and retrieving box score
        game_split = str(game).split(' ')

        # handling for Jan/Feb spillover into the next year
        if str(game).split(' ')[0] == 'January' or str(game).split(
                ' ')[0] == 'February':
            game_id = str(season + 1) + months_dict[game_split[0]] + str(
                game_split[1].zfill(2)) + '0' + team.lower()

            boxscore = Boxscore(uri=game_id)
        else:
            game_id = str(season) + months_dict[game_split[0]] + str(
                game_split[1].zfill(2)) + '0' + team.lower()
            boxscore = Boxscore(uri=game_id)

        # boxscore url's are only generated for home games, so handling for such exceptions
        if boxscore.home_abbreviation == 'None':
            print(
                'Game data does not exist. Game is either away game for {} or is yet to be played.'
                .format(team))

        else:
            # Unique index already created to avoid duplicates in table when updating, error handling for this
            try:

                game_data = {
                    'game_id':
                    game_id,
                    'home':
                    str(boxscore.home_abbreviation).upper(),
                    'away':
                    str(boxscore.away_abbreviation).upper(),
                    'home_score':
                    boxscore.home_points,
                    'away_score':
                    boxscore.away_points,
                    'season':
                    season,
                    'week_no': ((boxscore.datetime - dt.datetime.strptime(
                        first_game, '%Y/%m/%d')).days // 7) + 1,
                    'game_date':
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d')
                }

                df = pd.DataFrame(game_data, index=[0])

                df.to_sql('dim_nfl_games',
                          con=engine,
                          index=False,
                          if_exists='append')

                print('Game inputted: {} vs. {} on {} (Week {})'.format(
                    str(boxscore.home_abbreviation),
                    str(boxscore.away_abbreviation),
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d'),
                    ((boxscore.datetime - dt.datetime.strptime(
                        first_game, '%Y/%m/%d')).days // 7) + 1))

            except sql.exc.IntegrityError:
                print('Game: {} vs. {} on {} already present in table.'.format(
                    str(boxscore.home_abbreviation),
                    str(boxscore.away_abbreviation),
                    dt.date.strftime(boxscore.datetime, '%Y-%m-%d')))

    print('Schedule inputted: {} for {}'.format(team, season))
Example #8
0
 def boxscore(self):
     """
     Returns an instance of the Boxscore class containing more detailed
     stats on the game.
     """
     return Boxscore(self._boxscore)
Example #9
0
    def setup_method(self, *args, **kwargs):
        flexmock(Boxscore) \
            .should_receive('_parse_game_data') \
            .and_return(None)

        self.boxscore = Boxscore(None)
Example #10
0
    def test_url_404_page_returns_none(self):
        result = Boxscore(None)._retrieve_html_page('404')

        assert result is None
Example #11
0
    def test_invalid_url_returns_none(self, *args, **kwargs):
        result = Boxscore(None)._retrieve_html_page('bad')

        assert result is None
    'interceptions', 'yards_returned_from_interception',
    'interceptions_returned_for_touchdown', 'longest_interception_return',
    'passes_defended', 'sacks', 'combined_tackles', 'solo_tackles',
    'assists_on_tackles', 'tackles_for_loss', 'quarterback_hits',
    'fumbles_recovered', 'yards_recovered_from_fumble',
    'fumbles_recovered_for_touchdown', 'fumbles_forced', 'kickoff_returns',
    'kickoff_return_yards', 'average_kickoff_return_yards',
    'kickoff_return_touchdown', 'longest_kickoff_return', 'punt_returns',
    'punt_return_yards', 'yards_per_punt_return', 'punt_return_touchdown',
    'longest_punt_return', 'extra_points_made', 'extra_points_attempted',
    'field_goals_made', 'field_goals_attempted', 'punts', 'total_punt_yards',
    'yards_per_punt', 'longest_punt'
])

for x in sb_app.keys():
    game_data = Boxscore(x)

    home_df = game_data.home_players[0].dataframe
    for player in game_data.home_players[1:]:
        home_df = pd.concat([home_df, player.dataframe], axis=0)
    home_df['name'] = [x.name for x in game_data.home_players]
    home_df['team'] = game_data.home_abbreviation
    home_df['season'] = sb_app[x]

    away_df = game_data.away_players[0].dataframe
    for player in game_data.away_players[1:]:
        away_df = pd.concat([away_df, player.dataframe], axis=0)
    away_df['name'] = [x.name for x in game_data.away_players]
    away_df['team'] = game_data.away_abbreviation
    away_df['season'] = sb_app[x]
def get_nfl_player_performance(season, cred='credentials.json'):
    # establishing connection and cursor
    f = open(cred)

    login = json.load(f)

    engine = sql.create_engine(
        "mysql+pymysql://{}:{}@localhost:3306/nfl".format(
            str(login['userId']), str(login['password'])))

    f.close()

    # Iterating through teams
    teams = Teams.Teams(year=season)

    for team in teams:
        team_id = team.abbreviation.upper()

        schedule = Team(team_name=team_id, year=season).schedule
        months_dict = {
            'January': '01',
            'February': '02',
            'March': '03',
            'April': '04',
            'May': '05',
            'June': '06',
            'July': '07',
            'August': '08',
            'September': '09',
            'October': '10',
            'November': '11',
            'December': '12'
        }

        # Iterating through the schedule of each one of those teams
        for game in schedule:
            game_split = str(game).split(' ')

            # handling for Jan/Feb spillover into the next year
            if str(game).split(' ')[0] == 'January' or str(game).split(
                    ' ')[0] == 'February':
                game_id = str(season + 1) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) + '0' \
                          + team_id.lower()
                boxscore = Boxscore(uri=game_id)

            else:
                game_id = uri = str(season) + months_dict[game_split[0]] + str(game_split[1].zfill(2)) \
                                + '0' + team_id.lower()
                boxscore = Boxscore(uri=game_id)

            # boxscore url's are only generated for home games, so handling for such exceptions
            if boxscore.home_abbreviation == 'None':
                print(
                    'Game data does not exist. Game is either away game for {} or is yet to be played.'
                    .format(team))
                pass

            else:
                print("Inputting {} for team {}".format(game_id, team_id))

                # inputting home player performance
                for player in boxscore.home_players:
                    home_df = player.dataframe

                    home_df['game_id'] = game_id
                    home_df['team_id'] = team_id
                    home_df['player_id'] = home_df.index[0]

                    try:
                        home_df.to_sql('fact_nfl_performance',
                                       con=engine,
                                       index=False,
                                       if_exists='append')
                        print('Inputted: {} {} for game {}.'.format(
                            player.name, team_id, game_id))

                    except sql.exc.IntegrityError:
                        print(
                            'Player {} performance for {} already present in performance table.'
                            .format(player.name, game_id))

                    except AttributeError:
                        print('Exception: Empty String')

                # inputting away player performance
                for player in boxscore.away_players:

                    away_df = player.dataframe

                    away_df['game_id'] = game_id
                    away_df['team_id'] = str(
                        boxscore.away_abbreviation).upper()
                    away_df['player_id'] = away_df.index[0]

                    try:
                        away_df.to_sql('fact_nfl_performance',
                                       con=engine,
                                       index=False,
                                       if_exists='append')
                        print('Inputted: {} {} for game {}.'.format(
                            player.name, team_id, game_id))

                    except sql.exc.IntegrityError:
                        print(
                            'Player {} performance for {} already present in performance table.'
                            .format(player.name, game_id))

                    except AttributeError:
                        print('Exception: Empty String')