def _add_stats_data(teams_list, team_data_dict): """ Add a team's stats row to a dictionary. Pass table contents and a stats dictionary of all teams to accumulate all stats for each team in a single variable. Parameters ---------- teams_list : generator A generator of all row items in a given table. team_data_dict : {str: {'data': str, 'rank': int}} dictionary A dictionary where every key is the team's abbreviation and every value is another dictionary with a 'data' key which contains the string version of the row data for the matched team, and a 'rank' key which is the rank of the team. Returns ------- dictionary An updated version of the team_data_dict with the passed table row information included. """ # Teams are listed in terms of rank with the first team being #1 rank = 1 for team_data in teams_list: if 'class="thead onecell"' in str(team_data): continue abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation') try: team_data_dict[abbr]['data'] += team_data except KeyError: team_data_dict[abbr] = {'data': team_data, 'rank': rank} rank += 1 return team_data_dict
def _parse_game_data(self, game_data): """ Parses a value for every attribute. The function looks through every attribute with the exception of those listed below and retrieves the value according to the parsing scheme and index of the attribute from the passed HTML data. Once the value is retrieved, the attribute's value is updated with the returned result. Note that this method is called directory once Game is invoked and does not need to be called manually. Parameters ---------- game_data : string A string containing all of the rows of stats for a given game. """ for field in self.__dict__: # Remove the leading '_' from the name short_name = str(field)[1:] if short_name == 'datetime' or \ short_name == 'opponent_rank': continue elif short_name == 'opponent_abbr': self._parse_abbreviation(game_data) continue elif short_name == 'boxscore': self._parse_boxscore(game_data) continue value = utils._parse_field(SCHEDULE_SCHEME, game_data, short_name) setattr(self, field, value)
def _add_stats_data(teams_list, team_data_dict): """ Add a team's stats row to a dictionary. Pass table contents and a stats dictionary of all teams to accumulate all stats for each team in a single variable. Parameters ---------- teams_list : generator A generator of all row items in a given table. team_data_dict : {str: {'data': str}} dictionary A dictionary where every key is the team's abbreviation and every value is another dictionary with a 'data' key which contains the string version of the row data for the matched team. Returns ------- dictionary An updated version of the team_data_dict with the passed table row information included. """ if not teams_list: return team_data_dict for team_data in teams_list: # Skip the sub-header rows if 'class="over_header thead"' in str(team_data) or \ 'class="thead"' in str(team_data): continue abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation') try: team_data_dict[abbr]['data'] += team_data except KeyError: team_data_dict[abbr] = {'data': team_data} return team_data_dict
def _parse_game_data(self, uri): """ Parses a value for every attribute. This function looks through every attribute and retrieves the value according to the parsing scheme and index of the attribute from the passed HTML data. Once the value is retrieved, the attribute's value is updated with the returned result. Note that this method is called directly once Boxscore is invoked and does not need to be called manually. Parameters ---------- uri : string The relative link to the boxscore HTML page, such as 'BOS/BOS201806070'. """ boxscore = self._retrieve_html_page(uri) # If the boxscore is None, the game likely hasn't been played yet and # no information can be gathered. As there is nothing to grab, the # class instance should just be empty. if not boxscore: return for field in self.__dict__: # Remove the '_' from the name short_field = str(field)[1:] if short_field == 'winner' or \ short_field == 'winning_name' or \ short_field == 'winning_abbr' or \ short_field == 'losing_name' or \ short_field == 'losing_abbr' or \ short_field == 'uri': continue if short_field == 'date' or \ short_field == 'time' or \ short_field == 'venue' or \ short_field == 'attendance' or \ short_field == 'time_of_day' or \ short_field == 'duration': value = self._parse_game_date_and_location( short_field, boxscore) setattr(self, field, value) continue if short_field == 'away_name' or \ short_field == 'home_name': value = self._parse_name(short_field, boxscore) setattr(self, field, value) continue index = 0 if short_field in BOXSCORE_ELEMENT_INDEX.keys(): index = BOXSCORE_ELEMENT_INDEX[short_field] value = utils._parse_field(BOXSCORE_SCHEME, boxscore, short_field, index) setattr(self, field, value)
def test__parse_field_returns_value_for_non_abbreviation(self): parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]:first'} html_string = '''<td class="right " data-stat="batters_used">32</td> <td class="right " data-stat="age_bat">29.1</td> <td class="right " data-stat="runs_per_game">4.10</td>''' expected = '32' result = utils._parse_field(parsing_scheme, MockHtml(html_string, [expected]), 'batters_used') assert result == expected
def test_parse_field_returns_none_on_index_error(self): parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]:first'} html_string = '''<td class="right " data-stat="batters_used">32</td> <td class="right " data-stat="age_bat">29.1</td> <td class="right " data-stat="runs_per_game">4.10</td>''' expected = None result = utils._parse_field(parsing_scheme, MockHtml(html_string, [expected]), 'batters_used', index=3) assert result == expected
def test__parse_field_returns_abbreviation(self): parsing_scheme = {'abbreviation': 'a'} input_abbreviation = '/teams/ARI/2018.shtml' expected = 'ARI' flexmock(utils) \ .should_receive('_parse_abbreviation') \ .and_return('ARI') \ .once() result = utils._parse_field(parsing_scheme, MockHtml(input_abbreviation, None), 'abbreviation') assert result == expected
def test_secondary_index_pulling_values_bad_secondary(self): parsing_scheme = {'batters_used': 'td[data-stat="batters_used"]'} html_string = '''<td class="right " data-stat="batters_used">32</td> <td class="right " data-stat="age_bat">29.1</td> <td class="right " data-stat="runs_per_game">4.10</td> <td class="right " data-stat="batters_used">31</td> <td class="right " data-stat="batters_used">34</td>''' items = [32, 31, 34] result = utils._parse_field(parsing_scheme, MockHtml(html_string, items), 'batters_used', index=3, secondary_index=4) assert not result
def _parse_game_data(self, uri): """ Parses a value for every attribute. This function looks through every attribute and retrieves the value according to the parsing scheme and index of the attribute from the passed HTML data. Once the value is retrieved, the attribute's value is updated with the returned result. Note that this method is called directly once Boxscore is invoked and does not need to be called manually. Parameters ---------- uri : string The relative link to the boxscore HTML page, such as '201802040nwe'. """ boxscore = self._retrieve_html_page(uri) # If the boxscore is None, the game likely hasn't been played yet and # no information can be gathered. As there is nothing to grab, the # class instance should just be empty. if not boxscore: return fields_to_special_parse = [ 'away_even_strength_assists', 'away_power_play_assists', 'away_short_handed_assists', 'away_game_winning_goals', 'away_saves', 'away_save_percentage', 'away_shutout', 'home_even_strength_assists', 'home_power_play_assists', 'home_short_handed_assists', 'home_game_winning_goals', 'home_saves', 'home_save_percentage', 'home_shutout' ] for field in self.__dict__: # Remove the '_' from the name short_field = str(field)[1:] if short_field == 'winner' or \ short_field == 'winning_name' or \ short_field == 'winning_abbr' or \ short_field == 'losing_name' or \ short_field == 'losing_abbr' or \ short_field == 'uri': continue if short_field == 'date' or \ short_field == 'time' or \ short_field == 'arena' or \ short_field == 'attendance' or \ short_field == 'time_of_day' or \ short_field == 'duration': value = self._parse_game_date_and_location( short_field, boxscore) setattr(self, field, value) continue if short_field == 'away_name' or \ short_field == 'home_name': value = self._parse_name(short_field, boxscore) setattr(self, field, value) continue if short_field in fields_to_special_parse: scheme = BOXSCORE_SCHEME[short_field] value = [i.text() for i in boxscore(scheme).items()] setattr(self, field, value) continue index = 0 if short_field in BOXSCORE_ELEMENT_INDEX.keys(): index = BOXSCORE_ELEMENT_INDEX[short_field] value = utils._parse_field(BOXSCORE_SCHEME, boxscore, short_field, index) setattr(self, field, value) self._away_skaters = len(boxscore(BOXSCORE_SCHEME['away_skaters'])) num_away_goalies = boxscore(BOXSCORE_SCHEME['away_goalies']).items() # Skip the first element as it is dedicated to skaters and not goalies. next(num_away_goalies) self._away_goalies = len(next(num_away_goalies)('tbody tr'))