def test_second_request_is_from_cache(self): url = "https://afltables.com/afl/stats/playersA_idx.html" resp1 = requests.get(url) assert hasattr(resp1, "from_cache") assert not resp1.from_cache resp2 = requests.get(url) assert hasattr(resp2, "from_cache") assert resp2.from_cache
def test_all_requests_with_force_live_are_not_from_cache(self): url = "https://afltables.com/afl/stats/playersA_idx.html" resp1 = requests.get(url, force_live=True) assert not hasattr(resp1, "from_cache") resp2 = requests.get(url, force_live=True) assert not hasattr(resp2, "from_cache") resp3 = requests.get(url, force_live=True) assert not hasattr(resp3, "from_cache")
def get_player_stats(self): """ Returns player stats as per the player stats page defined in `self._get_player_url()` Returns ---------- stats : obj player stats Python object """ resp = requests.get(self.url) self._stat_html = resp.text soup = BeautifulSoup(self._stat_html, "html.parser") all_dfs = pd.read_html(self._stat_html) season_dfs = pd.read_html(self._stat_html, match=r"[A-Za-z]* - [0-9]{4}") season_stats_total = all_dfs[0] # The first table on the page season_stats_average = all_dfs[1] # The second table on the page ret = PlayerStats( season_stats_total=season_stats_total, season_stats_average=season_stats_average, season_results=season_dfs, ) return ret
def _get_player_url(self): last_initial = self.name.split(" ")[1][0] player_list_url = (config.AFLTABLES_STATS_BASE_URL + f"stats/players{last_initial}_idx.html") resp = requests.get(player_list_url) soup = BeautifulSoup(resp.text, "html.parser") url_list = soup.findAll( "a", href=re.compile( f"players/{self.name[0]}/{self.name.replace(' ', '_')}"), ) # If no matches found, raise LookupError if len(url_list) == 0: raise LookupError( f"Found no players with name {self.name}. Browse https://afltables.com/afl/stats/playersA_idx.html for a list of all players. Name must be in format '[first] [last]'." ) # If more than one name is matched, print warning message and return first. if len(url_list) > 1: print( f"Warning: {len(url_list)} players have been found for name: {self.name}. Returning only the first" ) return url_list[0].attrs.get("href")
def _get_games(self): """ Returns a Pandas dataframe listing every match contained in `self.all_time_games_url` Returns ---------- games : Pandas dataframe dataframe listing all games played by the team. Contains results and match metadata. """ resp = requests.get(self.all_time_games_url) soup = BeautifulSoup(resp.text, "html.parser") seasons = soup.findAll("table") dfs = [] for season_html in seasons: df = pd.read_html(str(season_html))[0] df.columns = df.columns.droplevel(1) df = df.iloc[0:-2, :] dfs.append(df) games = pd.concat(dfs) games.index = pd.to_datetime(games.Date) games = games.sort_index() games = games.rename(columns={ "A": "Against", "F": "For", "R": "Result", "M": "Margin" }) return games
def season_stats(self, year: int): """ Returns a Pandas dataframe detailing the season stats for the specified year. E.g. for Adelaide the table found at https://afltables.com/afl/stats/2020.html#1 Parameters ---------- year : int (required) year as a four-digit integer (e.g. 2019) Returns ---------- season_stats : Pandas dataframe dataframe summarising individual player (and team total) stats for the specified year. """ season_player_stats_url = f"https://afltables.com/afl/stats/{year}.html" resp = requests.get(season_player_stats_url) if resp.status_code == 404: raise Exception(f"Could not find season stats for year: {year}") soup = BeautifulSoup(resp.text, "html.parser") team_tables = soup.findAll("table") for table in team_tables: if table.find("th"): if self.name in table.find("th").text: df = pd.read_html(str(table)) if df is None: raise LookupError( f"Could not find season stats table for team {self.name} in year {year} at URL https://afltables.com/afl/stats/{year}.html" ) season_stats = df[0] season_stats.columns = season_stats.columns.droplevel() return season_stats
def _get_players(self): """ Returns a list of pyAFL.Player objects for all players contained in `self.all_time_players_url` Returns ---------- players : list list of pyAFL.Player objects """ resp = requests.get(self.all_time_players_url) soup = BeautifulSoup(resp.text, "html.parser") player_table = soup.find("table") player_table_body = player_table.find("tbody") player_anchor_tags = player_table_body.findAll("a") players = [ Player(player.text, url=player.attrs.get("href")) for player in player_anchor_tags ] return players
def test_invalid_url_throws_exception(self): with pytest.raises(AttributeError): resp = requests.get("https://abcdefgh")
def test_missing_url_schema_throws_exception(self): with pytest.raises(AttributeError): resp = requests.get("abcdefgh")
def test_none_url_throws_exception(self): with pytest.raises(TypeError): resp = requests.get()