Exemple #1
0
def schedule_for_month(url):
    response = requests.get(url=url)

    response.raise_for_status()

    page = SchedulePage(html=html.fromstring(html=response.content))
    parser = ScheduledGamesParser(
        start_time_parser=ScheduledStartTimeParser(),
        team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM),
    )
    return parser.parse_games(games=page.rows)
 def setUp(self):
     self.october_2001_html = requests.get(
         'https://www.basketball-reference.com/leagues/NBA_2001_games.html'
     ).text
     self.october_2018_html = requests.get(
         'https://www.basketball-reference.com/leagues/NBA_2018_games-october.html'
     ).text
     self.schedule_with_future_games_html_file = open(
         os.path.join(os.path.dirname(__file__),
                      './NBA_2019_games-april.html'))
     self.schedule_with_future_games_html = self.schedule_with_future_games_html_file.read(
     )
     self.parser = ScheduledGamesParser(
         start_time_parser=ScheduledStartTimeParser(),
         team_name_parser=TeamNameParser(
             team_names_to_teams=TEAM_NAME_TO_TEAM),
     )
Exemple #3
0
def season_schedule(season_end_year):
    url = '{BASE_URL}/leagues/NBA_{season_end_year}_games.html'.format(
        BASE_URL=BASE_URL, season_end_year=season_end_year)

    response = requests.get(url=url)

    response.raise_for_status()

    page = SchedulePage(html=html.fromstring(html=response.content))
    parser = ScheduledGamesParser(
        start_time_parser=ScheduledStartTimeParser(),
        team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM),
    )
    season_schedule_values = parser.parse_games(games=page.rows)

    for month_url_path in page.other_months_schedule_urls:
        url = '{BASE_URL}{month_url_path}'.format(
            BASE_URL=BASE_URL, month_url_path=month_url_path)
        monthly_schedule = schedule_for_month(url=url)
        season_schedule_values.extend(monthly_schedule)

    return season_schedule_values
class TestScheduleParser(TestCase):
    def setUp(self):
        self.october_2001_html = requests.get(
            'https://www.basketball-reference.com/leagues/NBA_2001_games.html'
        ).text
        self.october_2018_html = requests.get(
            'https://www.basketball-reference.com/leagues/NBA_2018_games-october.html'
        ).text
        self.schedule_with_future_games_html_file = open(
            os.path.join(os.path.dirname(__file__),
                         './NBA_2019_games-april.html'))
        self.schedule_with_future_games_html = self.schedule_with_future_games_html_file.read(
        )
        self.parser = ScheduledGamesParser(
            start_time_parser=ScheduledStartTimeParser(),
            team_name_parser=TeamNameParser(
                team_names_to_teams=TEAM_NAME_TO_TEAM),
        )

    def tearDown(self):
        self.schedule_with_future_games_html_file.close()

    def test_parse_october_2001_schedule_for_month_url_paths_(self):
        page = SchedulePage(html=html.fromstring(self.october_2001_html))
        expected_urls = [
            "/leagues/NBA_2001_games-november.html",
            "/leagues/NBA_2001_games-december.html",
            "/leagues/NBA_2001_games-january.html",
            "/leagues/NBA_2001_games-february.html",
            "/leagues/NBA_2001_games-march.html",
            "/leagues/NBA_2001_games-april.html",
            "/leagues/NBA_2001_games-may.html",
            "/leagues/NBA_2001_games-june.html",
        ]
        self.assertEqual(page.other_months_schedule_urls, expected_urls)

    def test_parse_october_2001_schedule(self):
        page = SchedulePage(html=html.fromstring(self.october_2001_html))
        parsed_schedule = self.parser.parse_games(games=page.rows)
        first_game = parsed_schedule[0]
        expected_datetime = pytz.timezone("US/Eastern") \
            .localize(datetime(year=2000, month=10, day=31, hour=19, minute=30)) \
            .astimezone(pytz.utc)

        self.assertEqual(len(parsed_schedule), 13)
        self.assertTrue(
            abs(first_game["start_time"] -
                expected_datetime) < timedelta(seconds=1))
        self.assertEqual(first_game["away_team"], Team.CHARLOTTE_HORNETS)
        self.assertEqual(first_game["home_team"], Team.ATLANTA_HAWKS)
        self.assertEqual(first_game["away_team_score"], 106)
        self.assertEqual(first_game["home_team_score"], 82)

    def test_parse_october_2018_schedule(self):
        page = SchedulePage(html=html.fromstring(self.october_2018_html))
        parsed_schedule = self.parser.parse_games(games=page.rows)
        self.assertEqual(len(parsed_schedule), 104)

    def test_parse_future_game(self):
        page = SchedulePage(
            html=html.fromstring(self.schedule_with_future_games_html))
        parsed_schedule = self.parser.parse_games(games=page.rows)
        first_game = parsed_schedule[0]
        expected_first_game_start_time = pytz.timezone("US/Eastern") \
            .localize(datetime(year=2019, month=4, day=1, hour=19, minute=30)) \
            .astimezone(pytz.utc)

        self.assertIsNotNone(parsed_schedule)
        self.assertEqual(len(parsed_schedule), 79)
        self.assertEqual(first_game["start_time"],
                         expected_first_game_start_time)
        self.assertEqual(first_game["away_team"], Team.MIAMI_HEAT)
        self.assertEqual(first_game["home_team"], Team.BOSTON_CELTICS)
        self.assertIsNone(first_game["away_team_score"])
        self.assertIsNone(first_game["home_team_score"])
    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )
class ParserService:
    PLAY_BY_PLAY_TIMESTAMP_FORMAT = "%M:%S.%f"
    PLAY_BY_PLAY_SCORES_REGEX = "(?P<away_team_score>[0-9]+)-(?P<home_team_score>[0-9]+)"
    SEARCH_RESULT_RESOURCE_LOCATION_REGEX = '(https?:\/\/www\.basketball-reference\.com\/)?(?P<resource_type>.+?(?=\/)).*\/(?P<resource_identifier>.+).html'

    def __init__(self):
        self.team_abbreviation_parser = TeamAbbreviationParser(
            abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM)
        self.league_abbreviation_parser = LeagueAbbreviationParser(
            abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE)
        self.location_abbreviation_parser = LocationAbbreviationParser(
            abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, )
        self.outcome_abbreviation_parser = OutcomeAbbreviationParser(
            abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, )
        self.outcome_parser = PlayerBoxScoreOutcomeParser(
            outcome_abbreviation_parser=self.outcome_abbreviation_parser)
        self.period_details_parser = PeriodDetailsParser(
            regulation_periods_count=4)
        self.period_timestamp_parser = PeriodTimestampParser(
            timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT)
        self.position_abbreviation_parser = PositionAbbreviationParser(
            abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, )
        self.seconds_played_parser = SecondsPlayedParser()
        self.scores_parser = ScoresParser(
            scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX)
        self.search_result_name_parser = SearchResultNameParser()
        self.search_result_location_parser = ResourceLocationParser(
            resource_location_regex=ParserService.
            SEARCH_RESULT_RESOURCE_LOCATION_REGEX)
        self.team_name_parser = TeamNameParser(
            team_names_to_teams=TEAM_NAME_TO_TEAM)

        self.play_by_plays_parser = PlayByPlaysParser(
            period_details_parser=self.period_details_parser,
            period_timestamp_parser=self.period_timestamp_parser,
            scores_parser=self.scores_parser,
        )
        self.player_box_scores_parser = PlayerBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_abbreviation_parser=self.outcome_abbreviation_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_data_parser = PlayerDataParser(
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            location_abbreviation_parser=self.location_abbreviation_parser,
            outcome_parser=self.outcome_parser,
            seconds_played_parser=self.seconds_played_parser)
        self.player_season_totals_parser = PlayerSeasonTotalsParser(
            position_abbreviation_parser=self.position_abbreviation_parser,
            team_abbreviation_parser=self.team_abbreviation_parser,
        )
        self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser,
            position_abbreviation_parser=self.position_abbreviation_parser,
        )
        self.scheduled_start_time_parser = ScheduledStartTimeParser()
        self.scheduled_games_parser = ScheduledGamesParser(
            start_time_parser=self.scheduled_start_time_parser,
            team_name_parser=self.team_name_parser,
        )
        self.search_results_parser = SearchResultsParser(
            search_result_name_parser=self.search_result_name_parser,
            search_result_location_parser=self.search_result_location_parser,
            league_abbreviation_parser=self.league_abbreviation_parser,
        )
        self.team_totals_parser = TeamTotalsParser(
            team_abbreviation_parser=self.team_abbreviation_parser)
        self.division_name_parser = DivisionNameParser(divisions=Division)
        self.team_standings_parser = TeamStandingsParser(teams=Team)
        self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
            division_name_parser=self.division_name_parser,
            team_standings_parser=self.team_standings_parser,
            divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
        )

    def parse_division_standings(self, standings):
        return self.conference_division_standings_parser.parse(
            division_standings=standings)

    def parse_play_by_plays(self, play_by_plays, away_team_name,
                            home_team_name):
        return self.play_by_plays_parser.parse(
            play_by_plays=play_by_plays,
            away_team=self.team_name_parser.parse_team_name(
                team_name=away_team_name),
            home_team=self.team_name_parser.parse_team_name(
                team_name=home_team_name),
        )

    def parse_player_box_scores(self, box_scores):
        return self.player_box_scores_parser.parse(box_scores=box_scores)

    def parse_player_season_box_scores(self, box_scores):
        return self.player_season_box_scores_parser.parse(
            box_scores=box_scores)

    def parse_player_advanced_season_totals_parser(self, totals):
        return self.player_advanced_season_totals_parser.parse(totals=totals)

    def parse_player_season_totals(self, totals):
        return self.player_season_totals_parser.parse(totals=totals)

    def parse_scheduled_games(self, games):
        return self.scheduled_games_parser.parse_games(games)

    def parse_team_totals(self, first_team_totals, second_team_totals):
        return self.team_totals_parser.parse(
            first_team_totals=first_team_totals,
            second_team_totals=second_team_totals)

    def parse_player_search_results(self, nba_aba_baa_players):
        return self.search_results_parser.parse(
            nba_aba_baa_players=nba_aba_baa_players)

    def parse_player_data(self, player):
        return self.player_data_parser.parse(player=player)