def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'PURDUE' for the Purdue Boilermakers. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('ncaab') # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#schedule') for item in schedule: if 'class="thead"' in str(item): continue game = Game(item) self._games.append(game)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'NWE' for the New England Patriots. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nfl') # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year) self._add_games_to_schedule(schedule, REGULAR_SEASON, year) if 'playoff_gamelog%s' % year in str(doc): playoffs = utils._get_stats_table(doc, 'table#playoff_gamelog%s' % year) self._add_games_to_schedule(playoffs, POST_SEASON, year)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'NYR' for the New York Rangers. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nhl') doc = pq(SCHEDULE_URL % (abbreviation, year)) schedule = utils._get_stats_table(doc, 'table#tm_gamelog_rs') for item in schedule: if 'class="thead"' in str(item): continue game = Game(item, year) self._games.append(game)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'MICHIGAN' for the Michigan Wolverines. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('ncaaf') doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#schedule') for item in schedule: game = Game(item) self._games.append(game)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'NWE' for the New England Patriots. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nfl') doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year) self._add_games_to_schedule(schedule, REGULAR_SEASON, year) if 'playoff_gamelog%s' % year in str(doc): playoffs = utils._get_stats_table(doc, 'table#playoff_gamelog%s' % year) self._add_games_to_schedule(playoffs, POST_SEASON, year)
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'DET' for the Detroit Pistons. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nba') doc = pq(SCHEDULE_URL % (abbreviation, year)) schedule = utils._get_stats_table(doc, 'table#tgl_basic') self._add_games_to_schedule(schedule) if 'tgl_basic_playoffs' in str(doc): playoffs = utils._get_stats_table(doc, 'div#all_tgl_basic_playoffs') self._add_games_to_schedule(playoffs)
def _retrieve_all_teams(year, basic_stats=None, basic_opp_stats=None, adv_stats=None, adv_opp_stats=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NCAAB stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. basic_stats : string (optional) Link with filename to the local basic stats page. basic_opp_stats : string (optional) Link with filename to the local basic opponent stats page. adv_stats : string (optional) Link with filename to the local advanved stats page. adv_opp_stats : string (optional) Link with filename to the local advanced opponents stats page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('ncaab') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(BASIC_STATS_URL % year) and \ utils._url_exists(BASIC_STATS_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(BASIC_STATS_URL % year, basic_stats) teams_list = utils._get_stats_table(doc, 'table#basic_school_stats') doc = utils._pull_page(BASIC_OPPONENT_STATS_URL % year, basic_opp_stats) opp_list = utils._get_stats_table(doc, 'table#basic_opp_stats') doc = utils._pull_page(ADVANCED_STATS_URL % year, adv_stats) adv_teams_list = utils._get_stats_table(doc, 'table#adv_school_stats') doc = utils._pull_page(ADVANCED_OPPONENT_STATS_URL % year, adv_opp_stats) adv_opp_list = utils._get_stats_table(doc, 'table#adv_opp_stats') if not teams_list and not opp_list and not adv_teams_list \ and not adv_opp_list: utils._no_data_found() return None, None for stats_list in [teams_list, opp_list, adv_teams_list, adv_opp_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, season_file=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NBA stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Parameters ---------- year : string The requested year to pull stats from. season_file : string (optional) Link with filename to the local season page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('nba') # Given the delays to the NBA season in 2020, the default season # selection logic is no longer valid after the original season should # have concluded. In this case, the previous season should be pulled # instead. if year == 2021: try: doc = pq(SEASON_PAGE_URL % year) except HTTPError: year = str(int(year) - 1) # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(SEASON_PAGE_URL % year, season_file) teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base') opp_teams_list = utils._get_stats_table(doc, 'div#all_opponent-stats-base') if not teams_list and not opp_teams_list: utils._no_data_found() return None, None for stats_list in [teams_list, opp_teams_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, season_page, offensive_stats, defensive_stats): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NCAAF stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. season_page : string (optional) Link with filename to the local season stats page. offensive_stats : string (optional) Link with filename to the local offensive stats page. defensive_stats : string (optional) Link with filename to the local defensive stats page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('ncaaf') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(SEASON_PAGE_URL % year, season_page) teams_list = utils._get_stats_table(doc, 'div#div_standings') offense_doc = utils._pull_page(OFFENSIVE_STATS_URL % year, offensive_stats) offense_list = utils._get_stats_table(offense_doc, 'table#offense') defense_doc = utils._pull_page(DEFENSIVE_STATS_URL % year, defensive_stats) defense_list = utils._get_stats_table(defense_doc, 'table#defense') if not teams_list and not offense_list and not defense_list: utils._no_data_found() for stats_list in [teams_list, offense_list, defense_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, standings_file=None, teams_file=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified MLB stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Parameters ---------- year : string The requested year to pull stats from. standings_file : string (optional) Link with filename to the local standings page. teams_file : string (optional) Link with filename to the local teams page. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('mlb') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(STANDINGS_URL % year) and \ utils._url_exists(STANDINGS_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(STANDINGS_URL % year, standings_file) div_prefix = 'div#all_expanded_standings_overall' standings = utils._get_stats_table(doc, div_prefix) doc = utils._pull_page(TEAM_STATS_URL % year, teams_file) div_prefix = 'div#all_teams_standard_%s' batting_stats = utils._get_stats_table(doc, div_prefix % 'batting') pitching_stats = utils._get_stats_table(doc, div_prefix % 'pitching') if not standings and not batting_stats and not pitching_stats: utils._no_data_found() return None, None for stats_list in [standings, batting_stats, pitching_stats]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _pull_schedule(self, abbreviation, year): """ Download and create objects for the team's schedule. Given a team abbreviation and season, first download the team's schedule page and convert to a PyQuery object, then create a Game instance for every game in the team's schedule and append it to the '_games' property. Parameters ---------- abbreviation : string A team's short name, such as 'DET' for the Detroit Pistons. year : string The requested year to pull stats from. """ if not year: year = utils._find_year_for_season('nba') # Given the delays to the NBA season in 2020, the default season # selection logic is no longer valid after the original season # should have concluded. In this case, the previous season should # be pulled instead. if year == 2021: try: doc = pq(SCHEDULE_URL % (abbreviation.lower(), year)) except HTTPError: year = str(int(year) - 1) # If stats for the requested season do not exist yet (as is the # case right before a new season begins), attempt to pull the # previous year's stats. If it exists, use the previous year # instead. if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), year)) and \ utils._url_exists(SCHEDULE_URL % (abbreviation.lower(), str(int(year) - 1))): year = str(int(year) - 1) doc = pq(SCHEDULE_URL % (abbreviation, year)) schedule = utils._get_stats_table(doc, 'table#games') if not schedule: utils._no_data_found() return self._add_games_to_schedule(schedule) if 'id="games_playoffs"' in str(doc): playoffs = utils._get_stats_table(doc, 'table#games_playoffs') self._add_games_to_schedule(playoffs, True)
def _retrieve_all_teams(year): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NFL stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. Returns ------- tuple Returns a ``tuple`` of the team_data_dict and year which represent all stats for all teams, and the given year that should be used to pull stats from, respectively. """ team_data_dict = {} if not year: year = utils._find_year_for_season('nfl') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = pq(SEASON_PAGE_URL % year) teams_list = utils._get_stats_table(doc, 'div#all_team_stats') afc_list = utils._get_stats_table(doc, 'table#AFC') nfc_list = utils._get_stats_table(doc, 'table#NFC') if not teams_list and not afc_list and not nfc_list: utils._no_data_found() return None, None for stats_list in [teams_list, afc_list, nfc_list]: team_data_dict = _add_stats_data(stats_list, team_data_dict) return team_data_dict, year
def _retrieve_all_teams(year, season_page=None): """ Find and create Team instances for all teams in the given season. For a given season, parses the specified NHL stats table and finds all requested stats. Each team then has a Team instance created which includes all requested stats and a few identifiers, such as the team's name and abbreviation. All of the individual Team instances are added to a list. Note that this method is called directly once Teams is invoked and does not need to be called manually. Parameters ---------- year : string The requested year to pull stats from. teams_file : string (optional) Link with filename to the local season page. Returns ------- tuple Returns a ``tuple`` in the format of (teams_list, year) where the teams_list is the PyQuery data for every team in the given season, and the year is the request year for the season. """ if not year: year = utils._find_year_for_season('nhl') # If stats for the requested season do not exist yet (as is the case # right before a new season begins), attempt to pull the previous # year's stats. If it exists, use the previous year instead. if not utils._url_exists(SEASON_PAGE_URL % year) and \ utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): year = str(int(year) - 1) doc = utils._pull_page(SEASON_PAGE_URL % year, season_page) teams_list = utils._get_stats_table(doc, 'div#all_stats') if not teams_list: utils._no_data_found() return None, None return teams_list, year
def test__find_year_for_season_returns_correct_year(self): season_start_matrix = [ # MLB Months SeasonStarts('mlb', 1, 2017), SeasonStarts('mlb', 2, 2017), SeasonStarts('mlb', 3, 2018), SeasonStarts('mlb', 4, 2018), SeasonStarts('mlb', 5, 2018), SeasonStarts('mlb', 6, 2018), SeasonStarts('mlb', 7, 2018), SeasonStarts('mlb', 8, 2018), SeasonStarts('mlb', 9, 2018), SeasonStarts('mlb', 10, 2018), SeasonStarts('mlb', 11, 2018), SeasonStarts('mlb', 12, 2018), # NBA Months SeasonStarts('nba', 1, 2018), SeasonStarts('nba', 2, 2018), SeasonStarts('nba', 3, 2018), SeasonStarts('nba', 4, 2018), SeasonStarts('nba', 5, 2018), SeasonStarts('nba', 6, 2018), SeasonStarts('nba', 7, 2018), SeasonStarts('nba', 8, 2018), SeasonStarts('nba', 9, 2019), SeasonStarts('nba', 10, 2019), SeasonStarts('nba', 11, 2019), SeasonStarts('nba', 12, 2019), # NCAAB Months SeasonStarts('ncaab', 1, 2018), SeasonStarts('ncaab', 2, 2018), SeasonStarts('ncaab', 3, 2018), SeasonStarts('ncaab', 4, 2018), SeasonStarts('ncaab', 5, 2018), SeasonStarts('ncaab', 6, 2018), SeasonStarts('ncaab', 7, 2018), SeasonStarts('ncaab', 8, 2018), SeasonStarts('ncaab', 9, 2018), SeasonStarts('ncaab', 10, 2019), SeasonStarts('ncaab', 11, 2019), SeasonStarts('ncaab', 12, 2019), # NCAAF Months SeasonStarts('ncaaf', 1, 2017), SeasonStarts('ncaaf', 2, 2017), SeasonStarts('ncaaf', 3, 2017), SeasonStarts('ncaaf', 4, 2017), SeasonStarts('ncaaf', 5, 2017), SeasonStarts('ncaaf', 6, 2017), SeasonStarts('ncaaf', 7, 2018), SeasonStarts('ncaaf', 8, 2018), SeasonStarts('ncaaf', 9, 2018), SeasonStarts('ncaaf', 10, 2018), SeasonStarts('ncaaf', 11, 2018), SeasonStarts('ncaaf', 12, 2018), # NFL Months SeasonStarts('nfl', 1, 2017), SeasonStarts('nfl', 2, 2017), SeasonStarts('nfl', 3, 2017), SeasonStarts('nfl', 4, 2017), SeasonStarts('nfl', 5, 2017), SeasonStarts('nfl', 6, 2017), SeasonStarts('nfl', 7, 2017), SeasonStarts('nfl', 8, 2018), SeasonStarts('nfl', 9, 2018), SeasonStarts('nfl', 10, 2018), SeasonStarts('nfl', 11, 2018), SeasonStarts('nfl', 12, 2018), # NHL Months SeasonStarts('nhl', 1, 2018), SeasonStarts('nhl', 2, 2018), SeasonStarts('nhl', 3, 2018), SeasonStarts('nhl', 4, 2018), SeasonStarts('nhl', 5, 2018), SeasonStarts('nhl', 6, 2018), SeasonStarts('nhl', 7, 2018), SeasonStarts('nhl', 8, 2018), SeasonStarts('nhl', 9, 2019), SeasonStarts('nhl', 10, 2019), SeasonStarts('nhl', 11, 2019), SeasonStarts('nhl', 12, 2019) ] for month in season_start_matrix: mock_datetime = MockDateTime(month.month, 2018) flexmock(utils) \ .should_receive('_todays_date')\ .and_return(mock_datetime) result = utils._find_year_for_season(month.league) assert result == month.expected_year