def schedule_for_month(self, url): response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) return self.parser.parse_scheduled_games(games=page.rows)
def schedule_for_month(url): response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) parser = ScheduledGamesParser( start_time_parser=ScheduledStartTimeParser(), team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM), ) return parser.parse_games(games=page.rows)
def test_all_rows_are_returned_when_all_rows_have_playoffs_content( self, mocked_rows_query): query = "some query" mocked_rows_query.return_value = query non_playoff_row = MagicMock() non_playoff_row.text_content = MagicMock(return_value="jaebaebae") rows = [non_playoff_row] self.html.xpath = MagicMock(return_value=rows) self.assertEqual( SchedulePage(html=self.html).rows, [ScheduleRow(html=non_playoff_row)])
def test_no_rows_are_returned_when_all_rows_have_playoffs_content( self, mocked_rows_query): query = "some query" mocked_rows_query.return_value = query playoff_row = MagicMock() playoff_row.text_content = MagicMock(return_value="Playoffs") rows = [playoff_row] self.html.xpath = MagicMock(return_value=rows) self.assertEqual(SchedulePage(html=self.html).rows, []) self.html.xpath.assert_called_once_with(query) playoff_row.text_content.assert_called_once_with()
def test_parse_october_2001_schedule_for_month_url_paths_(self): page = SchedulePage(html=html.fromstring(self.october_2001_html)) expected_urls = [ "/leagues/NBA_2001_games-november.html", "/leagues/NBA_2001_games-december.html", "/leagues/NBA_2001_games-january.html", "/leagues/NBA_2001_games-february.html", "/leagues/NBA_2001_games-march.html", "/leagues/NBA_2001_games-april.html", "/leagues/NBA_2001_games-may.html", "/leagues/NBA_2001_games-june.html", ] self.assertEqual(page.other_months_schedule_urls, expected_urls)
def test_parse_october_2001_schedule(self): page = SchedulePage(html=html.fromstring(self.october_2001_html)) parsed_schedule = self.parser.parse_games(games=page.rows) first_game = parsed_schedule[0] expected_datetime = pytz.timezone("US/Eastern") \ .localize(datetime(year=2000, month=10, day=31, hour=19, minute=30)) \ .astimezone(pytz.utc) self.assertEqual(len(parsed_schedule), 13) self.assertTrue( abs(first_game["start_time"] - expected_datetime) < timedelta(seconds=1)) self.assertEqual(first_game["away_team"], Team.CHARLOTTE_HORNETS) self.assertEqual(first_game["home_team"], Team.ATLANTA_HAWKS) self.assertEqual(first_game["away_team_score"], 106) self.assertEqual(first_game["home_team_score"], 82)
def test_other_months_schedule_urls( self, mocked_other_months_schedule_links_query): query = "some query" mocked_other_months_schedule_links_query.return_value = query link_href = "some link href" link = MagicMock() link.attrib = MagicMock() link.attrib.__getitem__ = MagicMock(return_value=link_href) links = [link] self.html.xpath = MagicMock(return_value=links) self.assertEqual( SchedulePage(html=self.html).other_months_schedule_urls, [link_href]) self.html.xpath.assert_called_once_with(query) link.attrib.__getitem__.assert_called_once_with('href')
def test_parse_future_game(self): page = SchedulePage( html=html.fromstring(self.schedule_with_future_games_html)) parsed_schedule = self.parser.parse_games(games=page.rows) first_game = parsed_schedule[0] expected_first_game_start_time = pytz.timezone("US/Eastern") \ .localize(datetime(year=2019, month=4, day=1, hour=19, minute=30)) \ .astimezone(pytz.utc) self.assertIsNotNone(parsed_schedule) self.assertEqual(len(parsed_schedule), 79) self.assertEqual(first_game["start_time"], expected_first_game_start_time) self.assertEqual(first_game["away_team"], Team.MIAMI_HEAT) self.assertEqual(first_game["home_team"], Team.BOSTON_CELTICS) self.assertIsNone(first_game["away_team_score"]) self.assertIsNone(first_game["home_team_score"])
def season_schedule(self, season_end_year): url = '{BASE_URL}/leagues/NBA_{season_end_year}_games.html'.format( BASE_URL=HTTPService.BASE_URL, season_end_year=season_end_year) response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) season_schedule_values = self.parser.parse_scheduled_games( games=page.rows) for month_url_path in page.other_months_schedule_urls: url = '{BASE_URL}{month_url_path}'.format( BASE_URL=HTTPService.BASE_URL, month_url_path=month_url_path) monthly_schedule = self.schedule_for_month(url=url) season_schedule_values.extend(monthly_schedule) return season_schedule_values
def season_schedule(season_end_year): url = '{BASE_URL}/leagues/NBA_{season_end_year}_games.html'.format( BASE_URL=BASE_URL, season_end_year=season_end_year) response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) parser = ScheduledGamesParser( start_time_parser=ScheduledStartTimeParser(), team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM), ) season_schedule_values = parser.parse_games(games=page.rows) for month_url_path in page.other_months_schedule_urls: url = '{BASE_URL}{month_url_path}'.format( BASE_URL=BASE_URL, month_url_path=month_url_path) monthly_schedule = schedule_for_month(url=url) season_schedule_values.extend(monthly_schedule) return season_schedule_values
def test_parse_october_2018_schedule(self): page = SchedulePage(html=html.fromstring(self.october_2018_html)) parsed_schedule = self.parser.parse_games(games=page.rows) self.assertEqual(len(parsed_schedule), 104)
def test_rows_query(self): self.assertEqual( SchedulePage(html=self.html).rows_query, '//table[@id="schedule"]//tbody/tr')
def test_other_months_schedule_links_query(self): self.assertEqual( SchedulePage(html=self.html).other_months_schedule_links_query, '//div[@id="content"]/div[@class="filter"]/div[not(contains(@class, "current"))]/a' )