Example #1
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'NWE' for the New England Patriots.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nfl')
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year)
        self._add_games_to_schedule(schedule, REGULAR_SEASON, year)
        if 'playoff_gamelog%s' % year in str(doc):
            playoffs = utils._get_stats_table(doc,
                                              'table#playoff_gamelog%s' % year)
            self._add_games_to_schedule(playoffs, POST_SEASON, year)
Example #2
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'NWE' for the New England Patriots.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nfl')
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#gamelog%s' % year)
        self._add_games_to_schedule(schedule, REGULAR_SEASON, year)
        if 'playoff_gamelog%s' % year in str(doc):
            playoffs = utils._get_stats_table(doc,
                                              'table#playoff_gamelog%s' % year)
            self._add_games_to_schedule(playoffs, POST_SEASON, year)
Example #3
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'DET' for the Detroit Pistons.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nba')
        doc = pq(SCHEDULE_URL % (abbreviation, year))
        schedule = utils._get_stats_table(doc, 'table#tgl_basic')
        self._add_games_to_schedule(schedule)
        if 'tgl_basic_playoffs' in str(doc):
            playoffs = utils._get_stats_table(doc,
                                              'div#all_tgl_basic_playoffs')
            self._add_games_to_schedule(playoffs)
Example #4
0
def _retrieve_all_teams(year, basic_stats=None, basic_opp_stats=None,
                        adv_stats=None, adv_opp_stats=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NCAAB stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Note that this method is called directly once Teams is invoked and does not
    need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    basic_stats : string (optional)
        Link with filename to the local basic stats page.
    basic_opp_stats : string (optional)
        Link with filename to the local basic opponent stats page.
    adv_stats : string (optional)
        Link with filename to the local advanved stats page.
    adv_opp_stats : string (optional)
        Link with filename to the local advanced opponents stats page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('ncaab')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(BASIC_STATS_URL % year) and \
           utils._url_exists(BASIC_STATS_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(BASIC_STATS_URL % year, basic_stats)
    teams_list = utils._get_stats_table(doc, 'table#basic_school_stats')
    doc = utils._pull_page(BASIC_OPPONENT_STATS_URL % year, basic_opp_stats)
    opp_list = utils._get_stats_table(doc, 'table#basic_opp_stats')
    doc = utils._pull_page(ADVANCED_STATS_URL % year, adv_stats)
    adv_teams_list = utils._get_stats_table(doc, 'table#adv_school_stats')
    doc = utils._pull_page(ADVANCED_OPPONENT_STATS_URL % year, adv_opp_stats)
    adv_opp_list = utils._get_stats_table(doc, 'table#adv_opp_stats')
    if not teams_list and not opp_list and not adv_teams_list \
       and not adv_opp_list:
        utils._no_data_found()
        return None, None
    for stats_list in [teams_list, opp_list, adv_teams_list, adv_opp_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Example #5
0
def _retrieve_all_teams(year, season_file=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NBA stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    season_file : string (optional)
        Link with filename to the local season page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('nba')
        # Given the delays to the NBA season in 2020, the default season
        # selection logic is no longer valid after the original season should
        # have concluded. In this case, the previous season should be pulled
        # instead.
        if year == 2021:
            try:
                doc = pq(SEASON_PAGE_URL % year)
            except HTTPError:
                year = str(int(year) - 1)
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(SEASON_PAGE_URL % year, season_file)
    teams_list = utils._get_stats_table(doc, 'div#all_team-stats-base')
    opp_teams_list = utils._get_stats_table(doc, 'div#all_opponent-stats-base')

    if not teams_list and not opp_teams_list:
        utils._no_data_found()
        return None, None
    for stats_list in [teams_list, opp_teams_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Example #6
0
def _retrieve_all_teams(year, season_page, offensive_stats, defensive_stats):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NCAAF stats table and finds
    all requested stats. Each team then has a Team instance created which
    includes all requested stats and a few identifiers, such as the team's
    name and abbreviation. All of the individual Team instances are added
    to a list.

    Note that this method is called directly once Teams is invoked and does
    not need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    season_page : string (optional)
        Link with filename to the local season stats page.
    offensive_stats : string (optional)
        Link with filename to the local offensive stats page.
    defensive_stats : string (optional)
        Link with filename to the local defensive stats page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('ncaaf')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(SEASON_PAGE_URL % year, season_page)
    teams_list = utils._get_stats_table(doc, 'div#div_standings')
    offense_doc = utils._pull_page(OFFENSIVE_STATS_URL % year, offensive_stats)
    offense_list = utils._get_stats_table(offense_doc, 'table#offense')
    defense_doc = utils._pull_page(DEFENSIVE_STATS_URL % year, defensive_stats)
    defense_list = utils._get_stats_table(defense_doc, 'table#defense')
    if not teams_list and not offense_list and not defense_list:
        utils._no_data_found()
    for stats_list in [teams_list, offense_list, defense_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Example #7
0
def _retrieve_all_teams(year, standings_file=None, teams_file=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified MLB stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    standings_file : string (optional)
        Link with filename to the local standings page.
    teams_file : string (optional)
        Link with filename to the local teams page.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('mlb')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(STANDINGS_URL % year) and \
           utils._url_exists(STANDINGS_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(STANDINGS_URL % year, standings_file)
    div_prefix = 'div#all_expanded_standings_overall'
    standings = utils._get_stats_table(doc, div_prefix)
    doc = utils._pull_page(TEAM_STATS_URL % year, teams_file)
    div_prefix = 'div#all_teams_standard_%s'
    batting_stats = utils._get_stats_table(doc, div_prefix % 'batting')
    pitching_stats = utils._get_stats_table(doc, div_prefix % 'pitching')
    if not standings and not batting_stats and not pitching_stats:
        utils._no_data_found()
        return None, None
    for stats_list in [standings, batting_stats, pitching_stats]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Example #8
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'PURDUE' for the Purdue Boilermakers.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('ncaab')
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#schedule')

        for item in schedule:
            if 'class="thead"' in str(item):
                continue
            game = Game(item)
            self._games.append(game)
Example #9
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'NYR' for the New York Rangers.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nhl')
        doc = pq(SCHEDULE_URL % (abbreviation, year))
        schedule = utils._get_stats_table(doc, 'table#tm_gamelog_rs')

        for item in schedule:
            if 'class="thead"' in str(item):
                continue
            game = Game(item, year)
            self._games.append(game)
Example #10
0
    def test__get_stats_table_returns_correct_table(self):
        html_string = '''<div>
    <table class="stats_table" id="all_stats">
        <tbody>
            <tr data-row="0">
                <td class="right " data-stat="column1">1</td>
            </tr>
            <tr data-row="1">
                <td class="right " data-stat="column2">2</td>
            </tr>
        </tbody>
    </table>
</div>'''
        expected = ['<tr data-row="0">\n<td class="right " '
                    'data-stat="column1">1</td>\n</tr>',
                    '<tr data-row="1">\n<td class="right " '
                    'data-stat="column2">2</td>\n</tr>']
        div = 'table#all_stats'
        flexmock(utils) \
            .should_receive('_remove_html_comment_tags') \
            .and_return(html_string) \
            .once()

        result = utils._get_stats_table(MockHtml(html_string, expected), div)

        i = 0
        for element in result:
            i += 1

        assert i == 2
Example #11
0
    def _pull_schedule(self, team_id, doc):
        """
        Download and create objects for the team's schedule.

        Given the team's abbreviation, pull the squad page and parse all of the
        games on the list. If a document is already provided (occurs when
        called directly from the Team class), that can be used to save an extra
        call to the website and games can be parsed from that object.

        A Game instance is created for every item in the team's schedule and
        appended to the '_games' property.

        Parameters
        ----------
        team_id : string
            The team's 8-digit squad ID or the team's name, such as 'Tottenham
            Hotspur'.
        doc : PyQuery object
            If passed to the class instantiation, this will be used to pull all
            information instead of making another request to the website. If
            the document is not provided, this value will be None.
        """
        if not doc:
            squad_id = _lookup_team(team_id)
            try:
                doc = pq(SQUAD_URL % squad_id)
            except HTTPError:
                return
        schedule = utils._get_stats_table(doc, 'table#matchlogs_all')

        if not schedule:
            utils._no_data_found()
            return
        self._add_games_to_schedule(schedule)
Example #12
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'MICHIGAN' for the Michigan
            Wolverines.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('ncaaf')
        doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
        schedule = utils._get_stats_table(doc, 'table#schedule')

        for item in schedule:
            game = Game(item)
            self._games.append(game)
Example #13
0
    def _pull_schedule(self, team_id, doc):
        """
        Download and create objects for the team's schedule.

        Given the team's abbreviation, pull the squad page and parse all of the
        games on the list. If a document is already provided (occurs when
        called directly from the Team class), that can be used to save an extra
        call to the website and games can be parsed from that object.

        A Game instance is created for every item in the team's schedule and
        appended to the '_games' property.

        Parameters
        ----------
        team_id : string
            The team's 8-digit squad ID or the team's name, such as 'Tottenham
            Hotspur'.
        doc : PyQuery object
            If passed to the class instantiation, this will be used to pull all
            information instead of making another request to the website. If
            the document is not provided, this value will be None.
        """
        if not doc:
            squad_id = _lookup_team(team_id)
            doc = pq(SQUAD_URL % squad_id)
        # Most leagues use the 'ks_sched_all' tag for competitions, but some,
        # like the MLS in North America, use a different table ID.
        for table_id in ['table#ks_sched_all', 'table#ks_sched_10090']:
            schedule = utils._get_stats_table(doc, table_id)
            if schedule:
                break
        if not schedule:
            utils._no_data_found()
            return
        self._add_games_to_schedule(schedule)
Example #14
0
    def _pull_schedule(self, abbreviation, year):
        """
        Download and create objects for the team's schedule.

        Given a team abbreviation and season, first download the team's
        schedule page and convert to a PyQuery object, then create a Game
        instance for every game in the team's schedule and append it to the
        '_games' property.

        Parameters
        ----------
        abbreviation : string
            A team's short name, such as 'DET' for the Detroit Pistons.
        year : string
            The requested year to pull stats from.
        """
        if not year:
            year = utils._find_year_for_season('nba')
            # Given the delays to the NBA season in 2020, the default season
            # selection logic is no longer valid after the original season
            # should have concluded. In this case, the previous season should
            # be pulled instead.
            if year == 2021:
                try:
                    doc = pq(SCHEDULE_URL % (abbreviation.lower(), year))
                except HTTPError:
                    year = str(int(year) - 1)
            # If stats for the requested season do not exist yet (as is the
            # case right before a new season begins), attempt to pull the
            # previous year's stats. If it exists, use the previous year
            # instead.
            if not utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                     year)) and \
               utils._url_exists(SCHEDULE_URL % (abbreviation.lower(),
                                                 str(int(year) - 1))):
                year = str(int(year) - 1)
        doc = pq(SCHEDULE_URL % (abbreviation, year))
        schedule = utils._get_stats_table(doc, 'table#games')
        if not schedule:
            utils._no_data_found()
            return
        self._add_games_to_schedule(schedule)
        if 'id="games_playoffs"' in str(doc):
            playoffs = utils._get_stats_table(doc, 'table#games_playoffs')
            self._add_games_to_schedule(playoffs, True)
Example #15
0
def _retrieve_all_teams(year):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NFL stats table and finds all
    requested stats. Each team then has a Team instance created which
    includes all requested stats and a few identifiers, such as the team's
    name and abbreviation. All of the individual Team instances are added
    to a list.

    Note that this method is called directly once Teams is invoked and does
    not need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.

    Returns
    -------
    tuple
        Returns a ``tuple`` of the team_data_dict and year which represent all
        stats for all teams, and the given year that should be used to pull
        stats from, respectively.
    """
    team_data_dict = {}

    if not year:
        year = utils._find_year_for_season('nfl')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = pq(SEASON_PAGE_URL % year)
    teams_list = utils._get_stats_table(doc, 'div#all_team_stats')
    afc_list = utils._get_stats_table(doc, 'table#AFC')
    nfc_list = utils._get_stats_table(doc, 'table#NFC')
    if not teams_list and not afc_list and not nfc_list:
        utils._no_data_found()
        return None, None
    for stats_list in [teams_list, afc_list, nfc_list]:
        team_data_dict = _add_stats_data(stats_list, team_data_dict)
    return team_data_dict, year
Example #16
0
def _retrieve_all_teams(year, season_page=None):
    """
    Find and create Team instances for all teams in the given season.

    For a given season, parses the specified NHL stats table and finds all
    requested stats. Each team then has a Team instance created which includes
    all requested stats and a few identifiers, such as the team's name and
    abbreviation. All of the individual Team instances are added to a list.

    Note that this method is called directly once Teams is invoked and does not
    need to be called manually.

    Parameters
    ----------
    year : string
        The requested year to pull stats from.
    teams_file : string (optional)
        Link with filename to the local season page.

    Returns
    -------
    tuple
        Returns a ``tuple`` in the format of (teams_list, year) where the
        teams_list is the PyQuery data for every team in the given season, and
        the year is the request year for the season.
    """
    if not year:
        year = utils._find_year_for_season('nhl')
        # If stats for the requested season do not exist yet (as is the case
        # right before a new season begins), attempt to pull the previous
        # year's stats. If it exists, use the previous year instead.
        if not utils._url_exists(SEASON_PAGE_URL % year) and \
           utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)):
            year = str(int(year) - 1)
    doc = utils._pull_page(SEASON_PAGE_URL % year, season_page)
    teams_list = utils._get_stats_table(doc, 'div#all_stats')
    if not teams_list:
        utils._no_data_found()
        return None, None
    return teams_list, year