def play_by_play(home_team, day, month, year): add_0_if_needed = lambda s: "0" + s if len(s) == 1 else s # the hard-coded `0` in the url assumes we always take the first match of the given date and team. url = "{BASE_URL}/boxscores/pbp/{year}{month}{day}0{team_abbr}.html".format( BASE_URL=BASE_URL, year=year, month=add_0_if_needed(str(month)), day=add_0_if_needed(str(day)), team_abbr=TEAM_TO_TEAM_ABBREVIATION[home_team]) response = requests.get(url=url) response.raise_for_status() page = PlayByPlayPage(html=html.fromstring(response.content)) play_by_plays_parser = PlayByPlaysParser( period_details_parser=PeriodDetailsParser(regulation_periods_count=4), period_timestamp_parser=PeriodTimestampParser( timestamp_format=PLAY_BY_PLAY_TIMESTAMP_FORMAT), scores_parser=ScoresParser(scores_regex=PLAY_BY_PLAY_SCORES_REGEX)) team_name_parser = TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM) return play_by_plays_parser.parse( play_by_plays=page.play_by_play_table.rows, away_team=team_name_parser.parse_team_name( team_name=page.away_team_name), home_team=team_name_parser.parse_team_name( team_name=page.home_team_name))
def schedule_for_month(url): response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) parser = ScheduledGamesParser( start_time_parser=ScheduledStartTimeParser(), team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM), ) return parser.parse_games(games=page.rows)
def setUp(self): self.october_2001_html = requests.get( 'https://www.basketball-reference.com/leagues/NBA_2001_games.html' ).text self.october_2018_html = requests.get( 'https://www.basketball-reference.com/leagues/NBA_2018_games-october.html' ).text self.schedule_with_future_games_html_file = open( os.path.join(os.path.dirname(__file__), './NBA_2019_games-april.html')) self.schedule_with_future_games_html = self.schedule_with_future_games_html_file.read( ) self.parser = ScheduledGamesParser( start_time_parser=ScheduledStartTimeParser(), team_name_parser=TeamNameParser( team_names_to_teams=TEAM_NAME_TO_TEAM), )
def season_schedule(season_end_year): url = '{BASE_URL}/leagues/NBA_{season_end_year}_games.html'.format( BASE_URL=BASE_URL, season_end_year=season_end_year) response = requests.get(url=url) response.raise_for_status() page = SchedulePage(html=html.fromstring(html=response.content)) parser = ScheduledGamesParser( start_time_parser=ScheduledStartTimeParser(), team_name_parser=TeamNameParser(team_names_to_teams=TEAM_NAME_TO_TEAM), ) season_schedule_values = parser.parse_games(games=page.rows) for month_url_path in page.other_months_schedule_urls: url = '{BASE_URL}{month_url_path}'.format( BASE_URL=BASE_URL, month_url_path=month_url_path) monthly_schedule = schedule_for_month(url=url) season_schedule_values.extend(monthly_schedule) return season_schedule_values
def __init__(self): self.team_abbreviation_parser = TeamAbbreviationParser( abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM) self.league_abbreviation_parser = LeagueAbbreviationParser( abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE) self.location_abbreviation_parser = LocationAbbreviationParser( abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, ) self.outcome_abbreviation_parser = OutcomeAbbreviationParser( abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, ) self.outcome_parser = PlayerBoxScoreOutcomeParser( outcome_abbreviation_parser=self.outcome_abbreviation_parser) self.period_details_parser = PeriodDetailsParser( regulation_periods_count=4) self.period_timestamp_parser = PeriodTimestampParser( timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT) self.position_abbreviation_parser = PositionAbbreviationParser( abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, ) self.seconds_played_parser = SecondsPlayedParser() self.scores_parser = ScoresParser( scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX) self.search_result_name_parser = SearchResultNameParser() self.search_result_location_parser = ResourceLocationParser( resource_location_regex=ParserService. SEARCH_RESULT_RESOURCE_LOCATION_REGEX) self.team_name_parser = TeamNameParser( team_names_to_teams=TEAM_NAME_TO_TEAM) self.play_by_plays_parser = PlayByPlaysParser( period_details_parser=self.period_details_parser, period_timestamp_parser=self.period_timestamp_parser, scores_parser=self.scores_parser, ) self.player_box_scores_parser = PlayerBoxScoresParser( team_abbreviation_parser=self.team_abbreviation_parser, location_abbreviation_parser=self.location_abbreviation_parser, outcome_abbreviation_parser=self.outcome_abbreviation_parser, seconds_played_parser=self.seconds_played_parser) self.player_data_parser = PlayerDataParser( search_result_location_parser=self.search_result_location_parser, league_abbreviation_parser=self.league_abbreviation_parser, ) self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser( team_abbreviation_parser=self.team_abbreviation_parser, location_abbreviation_parser=self.location_abbreviation_parser, outcome_parser=self.outcome_parser, seconds_played_parser=self.seconds_played_parser) self.player_season_totals_parser = PlayerSeasonTotalsParser( position_abbreviation_parser=self.position_abbreviation_parser, team_abbreviation_parser=self.team_abbreviation_parser, ) self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser( team_abbreviation_parser=self.team_abbreviation_parser, position_abbreviation_parser=self.position_abbreviation_parser, ) self.scheduled_start_time_parser = ScheduledStartTimeParser() self.scheduled_games_parser = ScheduledGamesParser( start_time_parser=self.scheduled_start_time_parser, team_name_parser=self.team_name_parser, ) self.search_results_parser = SearchResultsParser( search_result_name_parser=self.search_result_name_parser, search_result_location_parser=self.search_result_location_parser, league_abbreviation_parser=self.league_abbreviation_parser, ) self.team_totals_parser = TeamTotalsParser( team_abbreviation_parser=self.team_abbreviation_parser) self.division_name_parser = DivisionNameParser(divisions=Division) self.team_standings_parser = TeamStandingsParser(teams=Team) self.conference_division_standings_parser = ConferenceDivisionStandingsParser( division_name_parser=self.division_name_parser, team_standings_parser=self.team_standings_parser, divisions_to_conferences=DIVISIONS_TO_CONFERENCES, )
class ParserService: PLAY_BY_PLAY_TIMESTAMP_FORMAT = "%M:%S.%f" PLAY_BY_PLAY_SCORES_REGEX = "(?P<away_team_score>[0-9]+)-(?P<home_team_score>[0-9]+)" SEARCH_RESULT_RESOURCE_LOCATION_REGEX = '(https?:\/\/www\.basketball-reference\.com\/)?(?P<resource_type>.+?(?=\/)).*\/(?P<resource_identifier>.+).html' def __init__(self): self.team_abbreviation_parser = TeamAbbreviationParser( abbreviations_to_teams=TEAM_ABBREVIATIONS_TO_TEAM) self.league_abbreviation_parser = LeagueAbbreviationParser( abbreviations_to_league=LEAGUE_ABBREVIATIONS_TO_LEAGUE) self.location_abbreviation_parser = LocationAbbreviationParser( abbreviations_to_locations=LOCATION_ABBREVIATIONS_TO_POSITION, ) self.outcome_abbreviation_parser = OutcomeAbbreviationParser( abbreviations_to_outcomes=OUTCOME_ABBREVIATIONS_TO_OUTCOME, ) self.outcome_parser = PlayerBoxScoreOutcomeParser( outcome_abbreviation_parser=self.outcome_abbreviation_parser) self.period_details_parser = PeriodDetailsParser( regulation_periods_count=4) self.period_timestamp_parser = PeriodTimestampParser( timestamp_format=ParserService.PLAY_BY_PLAY_TIMESTAMP_FORMAT) self.position_abbreviation_parser = PositionAbbreviationParser( abbreviations_to_positions=POSITION_ABBREVIATIONS_TO_POSITION, ) self.seconds_played_parser = SecondsPlayedParser() self.scores_parser = ScoresParser( scores_regex=ParserService.PLAY_BY_PLAY_SCORES_REGEX) self.search_result_name_parser = SearchResultNameParser() self.search_result_location_parser = ResourceLocationParser( resource_location_regex=ParserService. SEARCH_RESULT_RESOURCE_LOCATION_REGEX) self.team_name_parser = TeamNameParser( team_names_to_teams=TEAM_NAME_TO_TEAM) self.play_by_plays_parser = PlayByPlaysParser( period_details_parser=self.period_details_parser, period_timestamp_parser=self.period_timestamp_parser, scores_parser=self.scores_parser, ) self.player_box_scores_parser = PlayerBoxScoresParser( team_abbreviation_parser=self.team_abbreviation_parser, location_abbreviation_parser=self.location_abbreviation_parser, outcome_abbreviation_parser=self.outcome_abbreviation_parser, seconds_played_parser=self.seconds_played_parser) self.player_data_parser = PlayerDataParser( search_result_location_parser=self.search_result_location_parser, league_abbreviation_parser=self.league_abbreviation_parser, ) self.player_season_box_scores_parser = PlayerSeasonBoxScoresParser( team_abbreviation_parser=self.team_abbreviation_parser, location_abbreviation_parser=self.location_abbreviation_parser, outcome_parser=self.outcome_parser, seconds_played_parser=self.seconds_played_parser) self.player_season_totals_parser = PlayerSeasonTotalsParser( position_abbreviation_parser=self.position_abbreviation_parser, team_abbreviation_parser=self.team_abbreviation_parser, ) self.player_advanced_season_totals_parser = PlayerAdvancedSeasonTotalsParser( team_abbreviation_parser=self.team_abbreviation_parser, position_abbreviation_parser=self.position_abbreviation_parser, ) self.scheduled_start_time_parser = ScheduledStartTimeParser() self.scheduled_games_parser = ScheduledGamesParser( start_time_parser=self.scheduled_start_time_parser, team_name_parser=self.team_name_parser, ) self.search_results_parser = SearchResultsParser( search_result_name_parser=self.search_result_name_parser, search_result_location_parser=self.search_result_location_parser, league_abbreviation_parser=self.league_abbreviation_parser, ) self.team_totals_parser = TeamTotalsParser( team_abbreviation_parser=self.team_abbreviation_parser) self.division_name_parser = DivisionNameParser(divisions=Division) self.team_standings_parser = TeamStandingsParser(teams=Team) self.conference_division_standings_parser = ConferenceDivisionStandingsParser( division_name_parser=self.division_name_parser, team_standings_parser=self.team_standings_parser, divisions_to_conferences=DIVISIONS_TO_CONFERENCES, ) def parse_division_standings(self, standings): return self.conference_division_standings_parser.parse( division_standings=standings) def parse_play_by_plays(self, play_by_plays, away_team_name, home_team_name): return self.play_by_plays_parser.parse( play_by_plays=play_by_plays, away_team=self.team_name_parser.parse_team_name( team_name=away_team_name), home_team=self.team_name_parser.parse_team_name( team_name=home_team_name), ) def parse_player_box_scores(self, box_scores): return self.player_box_scores_parser.parse(box_scores=box_scores) def parse_player_season_box_scores(self, box_scores): return self.player_season_box_scores_parser.parse( box_scores=box_scores) def parse_player_advanced_season_totals_parser(self, totals): return self.player_advanced_season_totals_parser.parse(totals=totals) def parse_player_season_totals(self, totals): return self.player_season_totals_parser.parse(totals=totals) def parse_scheduled_games(self, games): return self.scheduled_games_parser.parse_games(games) def parse_team_totals(self, first_team_totals, second_team_totals): return self.team_totals_parser.parse( first_team_totals=first_team_totals, second_team_totals=second_team_totals) def parse_player_search_results(self, nba_aba_baa_players): return self.search_results_parser.parse( nba_aba_baa_players=nba_aba_baa_players) def parse_player_data(self, player): return self.player_data_parser.parse(player=player)