예제 #1
0
def _add_stats_data(teams_list, team_data_dict):
    """
    Add a team's stats row to a dictionary.

    Pass table contents and a stats dictionary of all teams to accumulate
    all stats for each team in a single variable.

    Parameters
    ----------
    teams_list : generator
        A generator of all row items in a given table.
    team_data_dict : {str: {'data': str, 'rank': int}} dictionary
        A dictionary where every key is the team's abbreviation and every
        value is another dictionary with a 'data' key which contains the
        string version of the row data for the matched team, and a 'rank'
        key which is the rank of the team.

    Returns
    -------
    dictionary
        An updated version of the team_data_dict with the passed table row
        information included.
    """
    # Teams are listed in terms of rank with the first team being #1
    rank = 1
    for team_data in teams_list:
        if 'class="thead onecell"' in str(team_data):
            continue
        abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation')
        try:
            team_data_dict[abbr]['data'] += team_data
        except KeyError:
            team_data_dict[abbr] = {'data': team_data, 'rank': rank}
        rank += 1
    return team_data_dict
예제 #2
0
    def _parse_game_data(self, game_data):
        """
        Parses a value for every attribute.

        The function looks through every attribute with the exception of those
        listed below and retrieves the value according to the parsing scheme
        and index of the attribute from the passed HTML data. Once the value
        is retrieved, the attribute's value is updated with the returned
        result.

        Note that this method is called directory once Game is invoked and does
        not need to be called manually.

        Parameters
        ----------
        game_data : string
            A string containing all of the rows of stats for a given game.
        """
        for field in self.__dict__:
            # Remove the leading '_' from the name
            short_name = str(field)[1:]
            if short_name == 'datetime' or \
               short_name == 'opponent_rank':
                continue
            elif short_name == 'opponent_abbr':
                self._parse_abbreviation(game_data)
                continue
            elif short_name == 'boxscore':
                self._parse_boxscore(game_data)
                continue
            value = utils._parse_field(SCHEDULE_SCHEME, game_data, short_name)
            setattr(self, field, value)
예제 #3
0
def _add_stats_data(teams_list, team_data_dict):
    """
    Add a team's stats row to a dictionary.

    Pass table contents and a stats dictionary of all teams to accumulate all
    stats for each team in a single variable.

    Parameters
    ----------
    teams_list : generator
        A generator of all row items in a given table.
    team_data_dict : {str: {'data': str}} dictionary
        A dictionary where every key is the team's abbreviation and every value
        is another dictionary with a 'data' key which contains the string
        version of the row data for the matched team.

    Returns
    -------
    dictionary
        An updated version of the team_data_dict with the passed table row
        information included.
    """
    if not teams_list:
        return team_data_dict
    for team_data in teams_list:
        # Skip the sub-header rows
        if 'class="over_header thead"' in str(team_data) or \
           'class="thead"' in str(team_data):
            continue
        abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation')
        try:
            team_data_dict[abbr]['data'] += team_data
        except KeyError:
            team_data_dict[abbr] = {'data': team_data}
    return team_data_dict
예제 #4
0
    def _parse_game_data(self, uri):
        """
        Parses a value for every attribute.

        This function looks through every attribute and retrieves the value
        according to the parsing scheme and index of the attribute from the
        passed HTML data. Once the value is retrieved, the attribute's value is
        updated with the returned result.

        Note that this method is called directly once Boxscore is invoked and
        does not need to be called manually.

        Parameters
        ----------
        uri : string
            The relative link to the boxscore HTML page, such as
            'BOS/BOS201806070'.
        """
        boxscore = self._retrieve_html_page(uri)
        # If the boxscore is None, the game likely hasn't been played yet and
        # no information can be gathered. As there is nothing to grab, the
        # class instance should just be empty.
        if not boxscore:
            return

        for field in self.__dict__:
            # Remove the '_' from the name
            short_field = str(field)[1:]
            if short_field == 'winner' or \
               short_field == 'winning_name' or \
               short_field == 'winning_abbr' or \
               short_field == 'losing_name' or \
               short_field == 'losing_abbr' or \
               short_field == 'uri':
                continue
            if short_field == 'date' or \
               short_field == 'time' or \
               short_field == 'venue' or \
               short_field == 'attendance' or \
               short_field == 'time_of_day' or \
               short_field == 'duration':
                value = self._parse_game_date_and_location(
                    short_field, boxscore)
                setattr(self, field, value)
                continue
            if short_field == 'away_name' or \
               short_field == 'home_name':
                value = self._parse_name(short_field, boxscore)
                setattr(self, field, value)
                continue
            index = 0
            if short_field in BOXSCORE_ELEMENT_INDEX.keys():
                index = BOXSCORE_ELEMENT_INDEX[short_field]
            value = utils._parse_field(BOXSCORE_SCHEME, boxscore, short_field,
                                       index)
            setattr(self, field, value)
예제 #5
0
    def test__parse_field_returns_value_for_non_abbreviation(self):
        parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]:first'}
        html_string = '''<td class="right " data-stat="batters_used">32</td>
<td class="right " data-stat="age_bat">29.1</td>
<td class="right " data-stat="runs_per_game">4.10</td>'''
        expected = '32'

        result = utils._parse_field(parsing_scheme,
                                    MockHtml(html_string, [expected]),
                                    'batters_used')
        assert result == expected
예제 #6
0
    def test_parse_field_returns_none_on_index_error(self):
        parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]:first'}
        html_string = '''<td class="right " data-stat="batters_used">32</td>
<td class="right " data-stat="age_bat">29.1</td>
<td class="right " data-stat="runs_per_game">4.10</td>'''
        expected = None

        result = utils._parse_field(parsing_scheme,
                                    MockHtml(html_string, [expected]),
                                    'batters_used',
                                    index=3)
        assert result == expected
예제 #7
0
    def test__parse_field_returns_abbreviation(self):
        parsing_scheme = {'abbreviation': 'a'}
        input_abbreviation = '/teams/ARI/2018.shtml'
        expected = 'ARI'
        flexmock(utils) \
            .should_receive('_parse_abbreviation') \
            .and_return('ARI') \
            .once()

        result = utils._parse_field(parsing_scheme,
                                    MockHtml(input_abbreviation, None),
                                    'abbreviation')
        assert result == expected
예제 #8
0
    def test_secondary_index_pulling_values_bad_secondary(self):
        parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]'}
        html_string = '''<td class="right " data-stat="batters_used">32</td>
<td class="right " data-stat="age_bat">29.1</td>
<td class="right " data-stat="runs_per_game">4.10</td>
<td class="right " data-stat="batters_used">31</td>
<td class="right " data-stat="batters_used">34</td>'''
        items = [32, 31, 34]

        result = utils._parse_field(parsing_scheme,
                                    MockHtml(html_string, items),
                                    'batters_used',
                                    index=3,
                                    secondary_index=4)
        assert not result
예제 #9
0
    def _parse_game_data(self, uri):
        """
        Parses a value for every attribute.

        This function looks through every attribute and retrieves the value
        according to the parsing scheme and index of the attribute from the
        passed HTML data. Once the value is retrieved, the attribute's value is
        updated with the returned result.

        Note that this method is called directly once Boxscore is invoked and
        does not need to be called manually.

        Parameters
        ----------
        uri : string
            The relative link to the boxscore HTML page, such as
            '201802040nwe'.
        """
        boxscore = self._retrieve_html_page(uri)
        # If the boxscore is None, the game likely hasn't been played yet and
        # no information can be gathered. As there is nothing to grab, the
        # class instance should just be empty.
        if not boxscore:
            return

        fields_to_special_parse = [
            'away_even_strength_assists', 'away_power_play_assists',
            'away_short_handed_assists', 'away_game_winning_goals',
            'away_saves', 'away_save_percentage', 'away_shutout',
            'home_even_strength_assists', 'home_power_play_assists',
            'home_short_handed_assists', 'home_game_winning_goals',
            'home_saves', 'home_save_percentage', 'home_shutout'
        ]

        for field in self.__dict__:
            # Remove the '_' from the name
            short_field = str(field)[1:]
            if short_field == 'winner' or \
               short_field == 'winning_name' or \
               short_field == 'winning_abbr' or \
               short_field == 'losing_name' or \
               short_field == 'losing_abbr' or \
               short_field == 'uri':
                continue
            if short_field == 'date' or \
               short_field == 'time' or \
               short_field == 'arena' or \
               short_field == 'attendance' or \
               short_field == 'time_of_day' or \
               short_field == 'duration':
                value = self._parse_game_date_and_location(
                    short_field, boxscore)
                setattr(self, field, value)
                continue
            if short_field == 'away_name' or \
               short_field == 'home_name':
                value = self._parse_name(short_field, boxscore)
                setattr(self, field, value)
                continue
            if short_field in fields_to_special_parse:
                scheme = BOXSCORE_SCHEME[short_field]
                value = [i.text() for i in boxscore(scheme).items()]
                setattr(self, field, value)
                continue
            index = 0
            if short_field in BOXSCORE_ELEMENT_INDEX.keys():
                index = BOXSCORE_ELEMENT_INDEX[short_field]
            value = utils._parse_field(BOXSCORE_SCHEME, boxscore, short_field,
                                       index)
            setattr(self, field, value)

        self._away_skaters = len(boxscore(BOXSCORE_SCHEME['away_skaters']))
        num_away_goalies = boxscore(BOXSCORE_SCHEME['away_goalies']).items()
        # Skip the first element as it is dedicated to skaters and not goalies.
        next(num_away_goalies)
        self._away_goalies = len(next(num_away_goalies)('tbody tr'))